aboutsummaryrefslogtreecommitdiff
path: root/libpsn00b
diff options
context:
space:
mode:
authorspicyjpeg <thatspicyjpeg@gmail.com>2023-05-11 23:42:43 +0200
committerGitHub <noreply@github.com>2023-05-11 23:42:43 +0200
commit04d7728350cbd04dd86cd894e906c98673e3f9a7 (patch)
tree08e8c7dd495d1c4c6fcf5f7ba6b4b10693dc42f6 /libpsn00b
parenteaec942f56ceec9c14de5c4185a02602abadd50a (diff)
parent58a8306d24fe29d965aa8b40ddc37c3163c0a2f9 (diff)
downloadpsn00bsdk-04d7728350cbd04dd86cd894e906c98673e3f9a7.tar.gz
Merge pull request #70 from Lameguy64/v0.23-wip
Header cleanups, PCDRV, more safety checks, libc and mkpsxiso fixes (v0.23)
Diffstat (limited to 'libpsn00b')
-rw-r--r--libpsn00b/cmake/internal_setup.cmake15
-rw-r--r--libpsn00b/include/assert.h34
-rw-r--r--libpsn00b/include/cassert8
-rw-r--r--libpsn00b/include/cctype22
-rw-r--r--libpsn00b/include/cstdint34
-rw-r--r--libpsn00b/include/cstdio32
-rw-r--r--libpsn00b/include/cstdlib59
-rw-r--r--libpsn00b/include/cstring38
-rw-r--r--libpsn00b/include/ctype.h18
-rw-r--r--libpsn00b/include/dlfcn.h5
-rw-r--r--libpsn00b/include/elf.h18
-rw-r--r--libpsn00b/include/hwregs_c.h43
-rw-r--r--libpsn00b/include/inline_c.h5
-rw-r--r--libpsn00b/include/ioctl.h19
-rw-r--r--libpsn00b/include/lzp/lzp.h34
-rw-r--r--libpsn00b/include/lzp/lzqlp.h32
-rw-r--r--libpsn00b/include/malloc.h8
-rw-r--r--libpsn00b/include/psxapi.h116
-rw-r--r--libpsn00b/include/psxcd.h62
-rw-r--r--libpsn00b/include/psxetc.h55
-rw-r--r--libpsn00b/include/psxgpu.h375
-rw-r--r--libpsn00b/include/psxgte.h5
-rw-r--r--libpsn00b/include/psxkernel.h45
-rw-r--r--libpsn00b/include/psxpad.h5
-rw-r--r--libpsn00b/include/psxpress.h136
-rw-r--r--libpsn00b/include/psxsio.h5
-rw-r--r--libpsn00b/include/psxsn.h51
-rw-r--r--libpsn00b/include/psxspu.h29
-rw-r--r--libpsn00b/include/stdio.h41
-rw-r--r--libpsn00b/include/stdlib.h17
-rw-r--r--libpsn00b/include/string.h51
-rw-r--r--libpsn00b/include/strings.h5
-rw-r--r--libpsn00b/include/sys/fcntl.h12
-rw-r--r--libpsn00b/include/sys/ioctl.h13
-rw-r--r--libpsn00b/include/sys/types.h12
-rw-r--r--libpsn00b/libc/memcmp.s31
-rw-r--r--libpsn00b/libc/memcpy.s28
-rw-r--r--libpsn00b/libc/memmove.s42
-rw-r--r--libpsn00b/libc/start.c24
-rw-r--r--libpsn00b/libc/string.c604
-rw-r--r--libpsn00b/lzp/bit.h7
-rw-r--r--libpsn00b/lzp/compress.c16
-rw-r--r--libpsn00b/lzp/lzconfig.h (renamed from libpsn00b/include/lzconfig.h)40
-rw-r--r--libpsn00b/lzp/lzp.h34
-rw-r--r--libpsn00b/lzp/lzqlp.h32
-rw-r--r--libpsn00b/psxapi/_syscalls.s111
-rw-r--r--libpsn00b/psxapi/drivers.s32
-rw-r--r--libpsn00b/psxapi/fs.s18
-rw-r--r--libpsn00b/psxapi/stdio.s36
-rw-r--r--libpsn00b/psxapi/stubs.json62
-rw-r--r--libpsn00b/psxapi/sys.s58
-rw-r--r--libpsn00b/psxcd/cdread.c2
-rw-r--r--libpsn00b/psxcd/common.c16
-rw-r--r--libpsn00b/psxcd/isofs.c64
-rw-r--r--libpsn00b/psxcd/misc.c147
-rw-r--r--libpsn00b/psxetc/dl.c13
-rw-r--r--libpsn00b/psxetc/interrupts.c40
-rw-r--r--libpsn00b/psxgpu/common.c199
-rw-r--r--libpsn00b/psxgpu/drawing.c148
-rw-r--r--libpsn00b/psxgpu/env.c154
-rw-r--r--libpsn00b/psxgpu/font.c14
-rw-r--r--libpsn00b/psxgpu/image.c64
-rw-r--r--libpsn00b/psxpress/README.md29
-rw-r--r--libpsn00b/psxpress/mdec.c76
-rw-r--r--libpsn00b/psxpress/vlc.c160
-rw-r--r--libpsn00b/psxpress/vlc.s743
-rw-r--r--libpsn00b/psxpress/vlc2.c50
-rw-r--r--libpsn00b/psxsio/tty.c10
-rw-r--r--libpsn00b/psxspu/common.c36
69 files changed, 2925 insertions, 1674 deletions
diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake
index 9e7a4d3..e34ff63 100644
--- a/libpsn00b/cmake/internal_setup.cmake
+++ b/libpsn00b/cmake/internal_setup.cmake
@@ -280,23 +280,22 @@ function(psn00bsdk_add_cd_image name image_name config_file)
cmake_path(HASH config_file _hash)
- set(CD_IMAGE_NAME ${image_name})
- set(CD_CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cd_image_${_hash}.xml)
- configure_file("${config_file}" ${CD_CONFIG_FILE})
+ set(_xml_file ${CMAKE_CURRENT_BINARY_DIR}/cd_image_${_hash}.xml)
+ configure_file("${config_file}" ${_xml_file})
add_custom_command(
- OUTPUT ${CD_IMAGE_NAME}.bin ${CD_IMAGE_NAME}.cue
+ OUTPUT ${image_name}.bin ${image_name}.cue
COMMAND
${MKPSXISO} -y
- -o ${CD_IMAGE_NAME}.bin -c ${CD_IMAGE_NAME}.cue ${CD_CONFIG_FILE}
- COMMENT "Building CD image ${CD_IMAGE_NAME}"
+ -o ${image_name}.bin -c ${image_name}.cue ${_xml_file}
+ COMMENT "Building CD image ${image_name}"
VERBATIM
${ARGN}
)
add_custom_target(
${name} ALL
DEPENDS
- ${CMAKE_CURRENT_BINARY_DIR}/${CD_IMAGE_NAME}.bin
- ${CMAKE_CURRENT_BINARY_DIR}/${CD_IMAGE_NAME}.cue
+ ${CMAKE_CURRENT_BINARY_DIR}/${image_name}.bin
+ ${CMAKE_CURRENT_BINARY_DIR}/${image_name}.cue
)
endfunction()
diff --git a/libpsn00b/include/assert.h b/libpsn00b/include/assert.h
index 1b2bda2..8f8df74 100644
--- a/libpsn00b/include/assert.h
+++ b/libpsn00b/include/assert.h
@@ -1,13 +1,12 @@
/*
* PSn00bSDK assert macro and internal logging
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*
- * Note that the _sdk_log() macro is used internally by PSn00bSDK to output
- * debug messages and warnings.
+ * The _sdk_*() macros are used internally by PSn00bSDK to output messages when
+ * building in debug mode.
*/
-#ifndef __ASSERT_H
-#define __ASSERT_H
+#pragma once
#include <stdio.h>
@@ -25,6 +24,9 @@ void _assert_abort(const char *file, int line, const char *expr);
#define assert(expr)
#define _sdk_log(fmt, ...)
+#define _sdk_assert(expr, fmt, ...)
+#define _sdk_validate_args_void(expr)
+#define _sdk_validate_args(expr, ret)
#else
@@ -32,11 +34,27 @@ void _assert_abort(const char *file, int line, const char *expr);
((expr) ? ((void) 0) : _assert_abort(__FILE__, __LINE__, #expr))
#ifdef SDK_LIBRARY_NAME
-#define _sdk_log(fmt, ...) printf(SDK_LIBRARY_NAME ": " fmt, ##__VA_ARGS__)
+#define _sdk_log(fmt, ...) \
+ printf(SDK_LIBRARY_NAME ": " fmt __VA_OPT__(,) __VA_ARGS__)
#else
-#define _sdk_log(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#define _sdk_log(fmt, ...) \
+ printf(fmt __VA_OPT__(,) __VA_ARGS__)
#endif
-#endif
+#define _sdk_assert(expr, ret, fmt, ...) \
+ if (!(expr)) { \
+ _sdk_log(fmt, __VA_ARGS__); \
+ return ret; \
+ }
+#define _sdk_validate_args_void(expr) \
+ if (!(expr)) { \
+ _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \
+ return; \
+ }
+#define _sdk_validate_args(expr, ret) \
+ if (!(expr)) { \
+ _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \
+ return ret; \
+ }
#endif
diff --git a/libpsn00b/include/cassert b/libpsn00b/include/cassert
new file mode 100644
index 0000000..0923486
--- /dev/null
+++ b/libpsn00b/include/cassert
@@ -0,0 +1,8 @@
+/*
+ * PSn00bSDK assert macro and internal logging
+ * (C) 2022-2023 spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+#include <assert.h>
diff --git a/libpsn00b/include/cctype b/libpsn00b/include/cctype
new file mode 100644
index 0000000..b73ad34
--- /dev/null
+++ b/libpsn00b/include/cctype
@@ -0,0 +1,22 @@
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+namespace std {
+extern "C" {
+
+int isprint(int ch);
+int isgraph(int ch);
+int isspace(int ch);
+int isblank(int ch);
+int isalpha(int ch);
+int isdigit(int ch);
+
+int tolower(int ch);
+int toupper(int ch);
+
+}
+}
diff --git a/libpsn00b/include/cstdint b/libpsn00b/include/cstdint
new file mode 100644
index 0000000..3b1bc4a
--- /dev/null
+++ b/libpsn00b/include/cstdint
@@ -0,0 +1,34 @@
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ *
+ * This is a replacement for the <cstdint> header included with GCC, which seems
+ * to be broken (at least in GCC 12.2.0) as it requires some macros to be set.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+namespace std {
+
+#define _DEF_TYPE(bits, prefix) \
+ using ::prefix##bits##_t; \
+ using ::prefix##_fast##bits##_t; \
+ using ::prefix##_least##bits##_t;
+
+_DEF_TYPE( 8, int)
+_DEF_TYPE( 8, uint)
+_DEF_TYPE(16, int)
+_DEF_TYPE(16, uint)
+_DEF_TYPE(32, int)
+_DEF_TYPE(32, uint)
+
+#undef _DEF_TYPE
+
+using ::intmax_t;
+using ::uintmax_t;
+using ::intptr_t;
+using ::uintptr_t;
+
+}
diff --git a/libpsn00b/include/cstdio b/libpsn00b/include/cstdio
new file mode 100644
index 0000000..800d1a2
--- /dev/null
+++ b/libpsn00b/include/cstdio
@@ -0,0 +1,32 @@
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+#include <cstdarg>
+
+namespace std {
+extern "C" {
+
+/* String I/O API (provided by BIOS) */
+
+int printf(const char *fmt, ...);
+char *gets(char *str);
+void puts(const char *str);
+int getchar(void);
+void putchar(int ch);
+
+/* String formatting API (built-in) */
+
+int vsnprintf(char *string, unsigned int size, const char *fmt, va_list ap);
+int vsprintf(char *string, const char *fmt, va_list ap);
+int sprintf(char *string, const char *fmt, ...);
+int snprintf(char *string, unsigned int size, const char *fmt, ...);
+
+int vsscanf(const char *str, const char *format, va_list ap);
+int sscanf(const char *str, const char *fmt, ...);
+
+}
+}
diff --git a/libpsn00b/include/cstdlib b/libpsn00b/include/cstdlib
new file mode 100644
index 0000000..4fa859d
--- /dev/null
+++ b/libpsn00b/include/cstdlib
@@ -0,0 +1,59 @@
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace std {
+
+/* Definitions */
+
+static constexpr int RAND_MAX = 0x7fff;
+
+/* Structure definitions */
+
+struct HeapUsage {
+ size_t total; // Total size of heap + stack
+ size_t heap; // Amount of memory currently reserved for heap
+ size_t stack; // Amount of memory currently reserved for stack
+ size_t alloc; // Amount of memory currently allocated
+ size_t alloc_max; // Maximum amount of memory ever allocated
+};
+
+/* API */
+
+extern "C" {
+
+extern int __argc;
+extern const char **__argv;
+
+void abort(void);
+
+int rand(void);
+void srand(int seed);
+
+int abs(int j);
+long labs(long i);
+
+long strtol(const char *str, char **str_end, int base);
+long long strtoll(const char *str, char **str_end, int base);
+//float strtof(const char *str, char **str_end);
+//double strtod(const char *str, char **str_end);
+//long double strtold(const char *str, char **str_end);
+
+void InitHeap(void *addr, size_t size);
+void *sbrk(ptrdiff_t incr);
+
+void TrackHeapUsage(ptrdiff_t alloc_incr);
+void GetHeapUsage(HeapUsage *usage);
+
+void *malloc(size_t size);
+void *calloc(size_t num, size_t size);
+void *realloc(void *ptr, size_t size);
+void free(void *ptr);
+
+}
+}
diff --git a/libpsn00b/include/cstring b/libpsn00b/include/cstring
new file mode 100644
index 0000000..1ce7246
--- /dev/null
+++ b/libpsn00b/include/cstring
@@ -0,0 +1,38 @@
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace std {
+extern "C" {
+
+void *memset(void *dest, int ch, size_t count);
+void *memcpy(void *dest, const void *src, size_t count);
+void *memccpy(void *dest, const void *src, int ch, size_t count);
+void *memmove(void *dest, const void *src, size_t count);
+int memcmp(const void *lhs, const void *rhs, size_t count);
+void *memchr(const void *ptr, int ch, size_t count);
+
+char *strcpy(char *dest, const char *src);
+char *strncpy(char *dest, const char *src, size_t count);
+int strcmp(const char *lhs, const char *rhs);
+int strncmp(const char *lhs, const char *rhs, size_t count);
+char *strchr(const char *str, int ch);
+char *strrchr(const char *str, int ch);
+char *strpbrk(const char *str, const char *breakset);
+char *strstr(const char *str, const char *substr);
+
+size_t strlen(const char *str);
+char *strcat(char *dest, const char *src);
+char *strncat(char *dest, const char *src, size_t count);
+char *strdup(const char *str);
+char *strndup(const char *str, size_t count);
+
+char *strtok(char *str, const char *delim);
+
+}
+}
diff --git a/libpsn00b/include/ctype.h b/libpsn00b/include/ctype.h
index 24ee9d9..2fe0a42 100644
--- a/libpsn00b/include/ctype.h
+++ b/libpsn00b/include/ctype.h
@@ -1,20 +1,24 @@
/*
* PSn00bSDK standard library
- * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __CTYPE_H
-#define __CTYPE_H
+#pragma once
#ifdef __cplusplus
extern "C" {
#endif
-int tolower(int chr);
-int toupper(int chr);
+int isprint(int ch);
+int isgraph(int ch);
+int isspace(int ch);
+int isblank(int ch);
+int isalpha(int ch);
+int isdigit(int ch);
+
+int tolower(int ch);
+int toupper(int ch);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/dlfcn.h b/libpsn00b/include/dlfcn.h
index 6192430..5e1e3b6 100644
--- a/libpsn00b/include/dlfcn.h
+++ b/libpsn00b/include/dlfcn.h
@@ -3,8 +3,7 @@
* (C) 2021-2022 spicyjpeg - MPL licensed
*/
-#ifndef __DLFCN_H
-#define __DLFCN_H
+#pragma once
#include <stdint.h>
#include <stddef.h>
@@ -215,5 +214,3 @@ void *DL_GetDLLSymbol(const DLL *dll, const char *name);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/elf.h b/libpsn00b/include/elf.h
index abfb3d5..b0ddf71 100644
--- a/libpsn00b/include/elf.h
+++ b/libpsn00b/include/elf.h
@@ -9,8 +9,7 @@
* converted to enums.
*/
-#ifndef __ELF_H
-#define __ELF_H
+#pragma once
#include <stdint.h>
@@ -121,18 +120,3 @@ typedef enum {
STT_LOPROC = 13, /* Start of processor-specific */
STT_HIPROC = 15 /* End of processor-specific */
} Elf32_st_type;
-
-// If you need to add more constants, you may use the following Python snippet
-// to quickly convert #defines to enums:
-/*
-import re
-t = """<paste #defines here>"""
-t = re.sub(
- r"(0x[0-9a-f]+|0b[01]+|[0-9]+)",
- lambda m: f"= {m.group(1)},",
- t.replace("#define ", "\t").replace("#define\t", "\t")
-)
-print("typedef enum {\n" + t + "\n} NAME;")
-*/
-
-#endif
diff --git a/libpsn00b/include/hwregs_c.h b/libpsn00b/include/hwregs_c.h
index 7015101..2152986 100644
--- a/libpsn00b/include/hwregs_c.h
+++ b/libpsn00b/include/hwregs_c.h
@@ -3,8 +3,7 @@
* (C) 2022 spicyjpeg - MPL licensed
*/
-#ifndef __HWREGS_C_H
-#define __HWREGS_C_H
+#pragma once
#include <stdint.h>
@@ -35,7 +34,7 @@
#define CD_DATA _MMIO8(IOBASE | 0x1802)
#define CD_IRQ _MMIO8(IOBASE | 0x1803)
-#define CD_REG(N) _MMIO8(IOBASE | 0x1800 + (N))
+#define CD_REG(N) _MMIO8((IOBASE | 0x1800) + (N))
/* SPU */
@@ -74,13 +73,13 @@
// These are not named SPU_VOICE_* to avoid name clashes with SPU attribute
// flags defined in psxspu.h.
-#define SPU_CH_VOL_L(N) _MMIO16(IOBASE | 0x1c00 + 16 * (N))
-#define SPU_CH_VOL_R(N) _MMIO16(IOBASE | 0x1c02 + 16 * (N))
-#define SPU_CH_FREQ(N) _MMIO16(IOBASE | 0x1c04 + 16 * (N))
-#define SPU_CH_ADDR(N) _MMIO16(IOBASE | 0x1c06 + 16 * (N))
-#define SPU_CH_ADSR1(N) _MMIO16(IOBASE | 0x1c08 + 16 * (N))
-#define SPU_CH_ADSR2(N) _MMIO16(IOBASE | 0x1c0a + 16 * (N))
-#define SPU_CH_LOOP_ADDR(N) _MMIO16(IOBASE | 0x1c0e + 16 * (N))
+#define SPU_CH_VOL_L(N) _MMIO16((IOBASE | 0x1c00) + (16 * (N)))
+#define SPU_CH_VOL_R(N) _MMIO16((IOBASE | 0x1c02) + (16 * (N)))
+#define SPU_CH_FREQ(N) _MMIO16((IOBASE | 0x1c04) + (16 * (N)))
+#define SPU_CH_ADDR(N) _MMIO16((IOBASE | 0x1c06) + (16 * (N)))
+#define SPU_CH_ADSR1(N) _MMIO16((IOBASE | 0x1c08) + (16 * (N)))
+#define SPU_CH_ADSR2(N) _MMIO16((IOBASE | 0x1c0a) + (16 * (N)))
+#define SPU_CH_LOOP_ADDR(N) _MMIO16((IOBASE | 0x1c0e) + (16 * (N)))
/* MDEC */
@@ -92,11 +91,11 @@
// IMPORTANT: even though SIO_DATA is a 32-bit register, it should only be
// accessed as 8-bit. Reading it as 16 or 32-bit works fine on real hardware,
// but leads to problems in some emulators.
-#define SIO_DATA(N) _MMIO8 (IOBASE | 0x1040 + 16 * (N))
-#define SIO_STAT(N) _MMIO16(IOBASE | 0x1044 + 16 * (N))
-#define SIO_MODE(N) _MMIO16(IOBASE | 0x1048 + 16 * (N))
-#define SIO_CTRL(N) _MMIO16(IOBASE | 0x104a + 16 * (N))
-#define SIO_BAUD(N) _MMIO16(IOBASE | 0x104e + 16 * (N))
+#define SIO_DATA(N) _MMIO8 ((IOBASE | 0x1040) + (16 * (N)))
+#define SIO_STAT(N) _MMIO16((IOBASE | 0x1044) + (16 * (N)))
+#define SIO_MODE(N) _MMIO16((IOBASE | 0x1048) + (16 * (N)))
+#define SIO_CTRL(N) _MMIO16((IOBASE | 0x104a) + (16 * (N)))
+#define SIO_BAUD(N) _MMIO16((IOBASE | 0x104e) + (16 * (N)))
/* IRQ controller */
@@ -108,15 +107,15 @@
#define DMA_DPCR _MMIO32(IOBASE | 0x10f0)
#define DMA_DICR _MMIO32(IOBASE | 0x10f4)
-#define DMA_MADR(N) _MMIO32(IOBASE | 0x1080 + 16 * (N))
-#define DMA_BCR(N) _MMIO32(IOBASE | 0x1084 + 16 * (N))
-#define DMA_CHCR(N) _MMIO32(IOBASE | 0x1088 + 16 * (N))
+#define DMA_MADR(N) _MMIO32((IOBASE | 0x1080) + (16 * (N)))
+#define DMA_BCR(N) _MMIO32((IOBASE | 0x1084) + (16 * (N)))
+#define DMA_CHCR(N) _MMIO32((IOBASE | 0x1088) + (16 * (N)))
/* Timers */
-#define TIMER_VALUE(N) _MMIO32(IOBASE | 0x1100 + 16 * (N))
-#define TIMER_CTRL(N) _MMIO32(IOBASE | 0x1104 + 16 * (N))
-#define TIMER_RELOAD(N) _MMIO32(IOBASE | 0x1108 + 16 * (N))
+#define TIMER_VALUE(N) _MMIO32((IOBASE | 0x1100) + (16 * (N)))
+#define TIMER_CTRL(N) _MMIO32((IOBASE | 0x1104) + (16 * (N)))
+#define TIMER_RELOAD(N) _MMIO32((IOBASE | 0x1108) + (16 * (N)))
/* Memory/bus control */
@@ -130,5 +129,3 @@
#define BUS_EXP2_CFG _MMIO32(IOBASE | 0x101c)
#define BUS_COM_DELAY _MMIO32(IOBASE | 0x1020)
#define BUS_RAM_SIZE _MMIO32(IOBASE | 0x1060)
-
-#endif
diff --git a/libpsn00b/include/inline_c.h b/libpsn00b/include/inline_c.h
index 5facc1c..cb550b9 100644
--- a/libpsn00b/include/inline_c.h
+++ b/libpsn00b/include/inline_c.h
@@ -16,8 +16,7 @@
* compiled object files.
*/
-#ifndef _INLINE_C_H
-#define _INLINE_C_H
+#pragma once
/* GTE load macros */
@@ -1612,5 +1611,3 @@
: "g"( r0 ) )
#define gte_mvmva_b(sf, mx, v, cv, lm) gte_mvmva_core_b( 0x0400012 | \
((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) )
-
-#endif // _INLINE_C_H
diff --git a/libpsn00b/include/ioctl.h b/libpsn00b/include/ioctl.h
deleted file mode 100644
index 5c56422..0000000
--- a/libpsn00b/include/ioctl.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _IOCTL_H
-#define _IOCTL_H
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-#ifndef EOF
-#define EOF -1
-#endif
-
-// General
-#define FIONBLOCK (('f'<<8)|1)
-#define FIOCSCAN (('f'<<8)|2)
-
-// disk
-#define DIO_FORMAT (('d'<<8)|1)
-
-#endif \ No newline at end of file
diff --git a/libpsn00b/include/lzp/lzp.h b/libpsn00b/include/lzp/lzp.h
index 456de02..1aeea30 100644
--- a/libpsn00b/include/lzp/lzp.h
+++ b/libpsn00b/include/lzp/lzp.h
@@ -1,20 +1,29 @@
-/*! \file lzp.h
- * \brief Main library header
+/*
+ * liblzp data compression library
+ * (C) 2019 Lameguy64 - MPL licensed
*/
-/*! \mainpage
- * \version 0.20b
- * \author John Wilbert 'Lameguy64' Villamor
+/**
+ * @file lzp.h
+ * @brief Utility library for file bundling and compression
*
- * \section creditsSection Credits
- * - LZ77 data compression/decompression routines based from Ilya Muravyov's
- * crush.cpp released under public domain. Refined and ported to C by Lameguy64.
- * - CRC calculation routines based from Lammert Bies' lib_crc routines.
+ * @details This library implements a simple in-memory archive format which
+ * can be used to package and compress assets for faster loading, as well as a
+ * generic LZ77 compressor and matching decompressor. Two archive formats are
+ * supported, one uncompressed (.QLP) and one with individually compressed
+ * entries (.LZP).
*
+ * This header provides the LZ77 compression API and functions to parse and
+ * decompress .LZP archives after they have been loaded into memory.
+ *
+ * @section creditsSection Credits
+ * - LZ77 data compression/decompression routines based from Ilya Muravyov's
+ * crush.cpp released under public domain. Refined and ported to C by
+ * Lameguy64.
+ * - CRC calculation routines based from Lammert Bies' lib_crc routines.
*/
-#ifndef _LZPACK_H
-#define _LZPACK_H
+#pragma once
#include <stdint.h>
#ifdef _WIN32
@@ -218,6 +227,3 @@ int lzpUnpackFile(void* buff, const LZP_HEAD* lzpack, int fileNum);
#ifdef __cplusplus
}
#endif
-
-
-#endif // _LZPACK_H
diff --git a/libpsn00b/include/lzp/lzqlp.h b/libpsn00b/include/lzp/lzqlp.h
index 32ce0d7..127f263 100644
--- a/libpsn00b/include/lzp/lzqlp.h
+++ b/libpsn00b/include/lzp/lzqlp.h
@@ -1,5 +1,23 @@
-#ifndef _QLP_H
-#define _QLP_H
+/*
+ * liblzp data compression library
+ * (C) 2019 Lameguy64 - MPL licensed
+ */
+
+/**
+ * @file lzqlp.h
+ * @brief Utility library for file bundling
+ *
+ * @details This library implements a simple in-memory archive format which
+ * can be used to package and compress assets for faster loading, as well as a
+ * generic LZ77 compressor and matching decompressor. Two archive formats are
+ * supported, one uncompressed (.QLP) and one with individually compressed
+ * entries (.LZP).
+ *
+ * This header provides functions to parse .QLP archives and retrieve pointers
+ * to their contents after they have been loaded into memory.
+ */
+
+#pragma once
#include <stdint.h>
#ifdef _WIN32
@@ -23,9 +41,17 @@ typedef struct {
uint32_t offs;
} QLP_FILE;
+
+// Function prototypes
+#ifdef __cplusplus
+extern "C" {
+#endif
+
int qlpFileCount(const QLP_HEAD* qlpfile);
const QLP_FILE* qlpFileEntry(int index, const QLP_HEAD* qlpfile);
const void* qlpFileAddr(int index, const QLP_HEAD* qlpfile);
int qlpFindFile(char* fileName, const QLP_HEAD* qlpfile);
-#endif // _QLP_H \ No newline at end of file
+#ifdef __cplusplus
+}
+#endif
diff --git a/libpsn00b/include/malloc.h b/libpsn00b/include/malloc.h
deleted file mode 100644
index 75c3711..0000000
--- a/libpsn00b/include/malloc.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _MALLOC_H
-#define _MALLOC_H
-
-#warning "<malloc.h> is deprecated, include <stdlib.h> instead"
-
-#include <stdlib.h>
-
-#endif // _MALLOC_H \ No newline at end of file
diff --git a/libpsn00b/include/psxapi.h b/libpsn00b/include/psxapi.h
index 7353ed2..35ee040 100644
--- a/libpsn00b/include/psxapi.h
+++ b/libpsn00b/include/psxapi.h
@@ -1,10 +1,21 @@
/*
* PSn00bSDK kernel API library
- * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __PSXAPI_H
-#define __PSXAPI_H
+/**
+ * @file psxapi.h
+ * @brief Kernel API library header
+ *
+ * @details This header provides access to most of the APIs made available by
+ * the system's BIOS, including basic file I/O, TTY output, controller and
+ * memory card drivers, threads, events as well as kernel memory allocation.
+ *
+ * For more information and up-to-date documentation on kernel APIs, see:
+ * https://psx-spx.consoledev.net/kernelbios/
+ */
+
+#pragma once
#include <stdint.h>
#include <stddef.h>
@@ -12,13 +23,38 @@
/* Definitions */
-#define DescHW 0xf0000000
-#define DescSW 0xf4000000
-
-#define HwCARD (DescHW|0x11)
-#define HwCARD_1 (DescHW|0x12)
-#define HwCARD_0 (DescHW|0x13)
-#define SwCARD (DescHW|0x02)
+// TODO: these desperately need to be cleaned up
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+#define DescMask 0xff000000 // Event descriptor mask
+#define DescTH DescMask
+#define DescHW 0xf0000000 // Hardware event (IRQ)
+#define DescEV 0xf1000000 // Event event
+#define DescRC 0xf2000000 // Root counter event
+#define DescUEV 0xf3000000 // User event
+#define DescSW 0xf4000000 // BIOS event
+
+#define HwVBLANK (DescHW|0x01) // VBlank
+#define HwGPU (DescHW|0x02) // GPU
+#define HwCdRom (DescHW|0x03) // CDROM
+#define HwDMAC (DescHW|0x04) // DMA
+#define HwRTC0 (DescHW|0x05) // Timer 0
+#define HwRTC1 (DescHW|0x06) // Timer 1
+#define HwRTC2 (DescHW|0x07) // Timer 2
+#define HwCNTL (DescHW|0x08) // Controller
+#define HwSPU (DescHW|0x09) // SPU
+#define HwPIO (DescHW|0x0a) // PIO & lightgun
+#define HwSIO (DescHW|0x0b) // Serial
+
+#define HwCPU (DescHW|0x10) // Processor exception
+#define HwCARD (DescHW|0x11) // Memory card (lower level BIOS functions)
+#define HwCard_0 (DescHW|0x12)
+#define HwCard_1 (DescHW|0x13)
+#define SwCARD (DescSW|0x01) // Memory card (higher level BIOS functions)
+#define SwMATH (DescSW|0x02)
#define EvSpIOE 0x0004
#define EvSpERROR 0x8000
@@ -135,8 +171,6 @@ struct JMP_BUF {
uint32_t gp;
};
-// Not recommended to use these functions to install IRQ handlers
-
typedef struct {
uint32_t *next;
uint32_t *func2;
@@ -158,7 +192,8 @@ typedef struct {
#define FastExitCriticalSection() \
(IRQ_MASK = __saved_irq_mask)
-/*#define FastEnterCriticalSection() { \
+#if 0
+#define FastEnterCriticalSection() { \
uint32_t r0, r1; \
__asm__ volatile( \
"mfc0 %0, $12;" \
@@ -179,9 +214,10 @@ typedef struct {
"nop;" \
: "=r"(r0) :: \
); \
-}*/
+}
+#endif
-/* API */
+/* BIOS API */
#ifdef __cplusplus
extern "C" {
@@ -199,23 +235,28 @@ int DisableEvent(int event);
void DeliverEvent(uint32_t cl, uint32_t spec);
void UnDeliverEvent(uint32_t cl, uint32_t spec);
-int open(const char *name, int mode);
+int open(const char *path, int mode);
int close(int fd);
-int seek(int fd, uint32_t offset, int mode);
-int read(int fd, uint8_t *buff, size_t len);
-int write(int fd, const uint8_t *buff, size_t len);
+int lseek(int fd, uint32_t offset, int mode);
+int read(int fd, void *buff, size_t len);
+int write(int fd, const void *buff, size_t len);
+int getc(int fd);
+int putc(int ch, int fd);
int ioctl(int fd, int cmd, int arg);
+int isatty(int fd);
struct DIRENTRY *firstfile(const char *wildcard, struct DIRENTRY *entry);
struct DIRENTRY *nextfile(struct DIRENTRY *entry);
-int erase(const char *name);
-int chdir(const char *path);
+int erase(const char *path);
+int undelete(const char *path);
+int cd(const char *path);
-//#define cd(p) chdir(p)
+int _get_errno(void);
+int _get_error(int fd);
-int AddDev(DCB *dcb);
-int DelDev(const char *name);
-void ListDev(void);
-void AddDummyTty(void);
+int AddDrv(DCB *dcb);
+int DelDrv(const char *name);
+void ListDrv(void);
+void add_nullcon_driver(void);
int EnterCriticalSection(void);
void ExitCriticalSection(void);
@@ -254,30 +295,33 @@ int ResetRCnt(int spec);
void ChangeClearPAD(int mode);
void ChangeClearRCnt(int t, int m);
-uint32_t OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp);
-int CloseTh(uint32_t thread);
-int ChangeTh(uint32_t thread);
+int OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp);
+int CloseTh(int thread);
+int ChangeTh(int thread);
-int Exec(struct EXEC *exec, int argc, char **argv);
+int Exec(struct EXEC *exec, int argc, const char **argv);
+int LoadExec(const char *path, int argc, const char **argv);
void FlushCache(void);
void b_setjmp(struct JMP_BUF *buf);
void b_longjmp(const struct JMP_BUF *buf, int param);
-void SetDefaultExitFromException(void);
-void SetCustomExitFromException(const struct JMP_BUF *buf);
+void ResetEntryInt(void);
+void HookEntryInt(const struct JMP_BUF *buf);
void ReturnFromException(void);
+int SetConf(int evcb, int tcb, uint32_t sp);
+void GetConf(int *evcb, int *tcb, uint32_t *sp);
+void SetMem(int size);
+
int GetSystemInfo(int index);
void *GetB0Table(void);
void *GetC0Table(void);
-void *_kernel_malloc(int size);
-void _kernel_free(void *ptr);
+void *alloc_kernel_memory(int size);
+void free_kernel_memory(void *ptr);
void _boot(void);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxcd.h b/libpsn00b/include/psxcd.h
index 503bc83..fc9c391 100644
--- a/libpsn00b/include/psxcd.h
+++ b/libpsn00b/include/psxcd.h
@@ -21,8 +21,7 @@
* library extension is considered for future development.
*/
-#ifndef __PSXCD_H
-#define __PSXCD_H
+#pragma once
#include <stdint.h>
@@ -811,6 +810,48 @@ int CdMode(void);
int CdStatus(void);
/**
+ * @brief Returns the CD-ROM controller's region code.
+ *
+ * @details Reads region information from the drive using a CdlTest command.
+ * This can be used to reliably determine the system's region without having to
+ * resort to workarounds like probing the BIOS ROM.
+ *
+ * This function may return incorrect results and trigger error callbacks on
+ * emulators or consoles equipped with CD-ROM drive emulation devices such as
+ * the PSIO. It is not affected by modchips.
+ *
+ * @return Region code or 0 if the region cannot be determined
+ */
+CdlRegionCode CdGetRegion(void);
+
+/**
+ * @brief Attempts to disable the CD-ROM controller's region check.
+ *
+ * @details Sends undocumented commands to the drive in an attempt to disable
+ * the region string check, in order to allow reading data from non-PS1 discs
+ * as well as CD-Rs without needing a modchip. As unlocking commands are region
+ * specific, the drive's region must be obtained beforehand using CdGetRegion()
+ * and passed to this function. The unlock persists even if the lid is opened,
+ * but not if a CdlReset command is issued.
+ *
+ * Unlocking is only supported on US, European and Net Yaroze consoles (not on
+ * Japanese models, devkits and most emulators). This function will return 1
+ * without doing anything if CdlRegionDebug is passed as region, as debug
+ * consoles can already read unlicensed discs.
+ *
+ * NOTE: if any callbacks were set using CdReadyCallback() or CdSyncCallback()
+ * prior to calling CdUnlock(), they will be called with an error code as part
+ * of the unlocking sequence, even if the unlock was successful. It is thus
+ * recommended to call this function before setting any callbacks.
+ *
+ * @param region
+ * @return 1 if the drive was successfully unlocked, 0 otherwise
+ *
+ * @see CdGetRegion()
+ */
+int CdUnlock(CdlRegionCode region);
+
+/**
* @brief Retrieves the disc's table of contents.
*
* @details Retrieves the track entries from a CD's table of contents (TOC). The
@@ -832,21 +873,6 @@ int CdStatus(void);
int CdGetToc(CdlLOC *toc);
/**
- * @brief Returns the CD-ROM controller's region code.
- *
- * @details Attempts to fetch region information from the drive using a CdlTest
- * command. This can be used to reliably determine the system's region without
- * having to resort to workarounds like probing the BIOS ROM.
- *
- * This function may return incorrect results on emulators or consoles equipped
- * with CD-ROM drive emulation devices such as the PSIO. It is not affected by
- * modchips.
- *
- * @return Region code or 0 if the region cannot be determined
- */
-CdlRegionCode CdGetRegion(void);
-
-/**
* @brief Sets the CD-ROM volume mixing matrix.
*
* @details Sets the volume levels of the CD-ROM drive's audio output (used for
@@ -1046,5 +1072,3 @@ int CdLoadSession(int session);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxetc.h b/libpsn00b/include/psxetc.h
index ae4611e..8dd1dd5 100644
--- a/libpsn00b/include/psxetc.h
+++ b/libpsn00b/include/psxetc.h
@@ -1,6 +1,6 @@
/*
* PSn00bSDK interrupt management library
- * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
*/
/**
@@ -13,8 +13,7 @@
* separate header).
*/
-#ifndef __PSXETC_H
-#define __PSXETC_H
+#pragma once
/* IRQ and DMA channel definitions */
@@ -68,7 +67,7 @@ extern "C" {
* | ID | Channel | Used by |
* | --: | :--------------- | :-------------------------------------- |
* | 0 | IRQ_VBLANK | psxgpu (use VSyncCallback() instead) |
- * | 1 | IRQ_GPU | |
+ * | 1 | IRQ_GPU | psxgpu (use DrawSyncCallback() instead) |
* | 2 | IRQ_CD | psxcd (use CdReadyCallback() instead) |
* | 3 | IRQ_DMA | psxetc (use DMACallback() instead) |
* | 4 | IRQ_TIMER0 | |
@@ -156,19 +155,50 @@ void *DMACallback(DMA_Channel dma, void (*func)(void));
void *GetDMACallback(DMA_Channel dma);
/**
- * @brief Initializes the interrupt dispatcher.
+ * @brief Enables, disables or sets the priority of a DMA channel.
+ *
+ * @details Enables the specified DMA channel and configures its priority (if
+ * priority >= 0) or disables it (if priority = -1). The priority value must be
+ * in 0-7 range, with 0 being the highest priority and 7 the lowest.
+ *
+ * All channels are disabled upon calling ResetCallback(); most libraries will
+ * re-enable them as needed. By default the priority is set to 3 for all
+ * channels.
+ *
+ * @param dma
+ * @param priority Priority in 0-7 range or -1 to disable the channel
+ * @return Previously set priority in 0-7 range, -1 if the channel was disabled
+ */
+int SetDMAPriority(DMA_Channel dma, int priority);
+
+/**
+ * @brief Gets the priority of a DMA channel.
+ *
+ * @details Returns the currently set priority value for the specified DMA
+ * channel in 0-7 range, with 0 being the highest priority and 7 the lowest.
+ * Returns -1 if the channel is not enabled.
+ *
+ * @param dma
+ * @return Priority in 0-7 range, -1 if the channel is disabled
+ *
+ * @see SetDMAPriority()
+ */
+int GetDMAPriority(DMA_Channel dma);
+
+/**
+ * @brief Initializes the interrupt dispatcher and DMA controller.
*
* @details Sets up the interrupt handling system, hooks the BIOS to dispatch
- * interrupts to the library and clears all registered callbacks. This function
- * must be called once at the beginning of the program, prior to registering
- * any IRQ or DMA callbacks.
+ * interrupts to the library, clears all registered callbacks and disables all
+ * DMA channels. This function must be called once at the beginning of the
+ * program, prior to registering any IRQ or DMA callbacks.
*
* ResetCallback() is called by psxgpu's ResetGraph(), so invoking it manually
* is usually not required. Calling ResetCallback() after ResetGraph() will
* actually result in improper initialization, as ResetGraph() registers
* several callbacks used internally by psxgpu.
*
- * @return 0 or -1 if the was already initialized
+ * @return 0 or -1 if the dispatcher was already initialized
*/
int ResetCallback(void);
@@ -196,6 +226,11 @@ void RestartCallback(void);
* Note that interrupts are (obviously) disabled until RestartCallback() is
* called.
*
+ * WARNING: any ongoing background processing or DMA transfer must be stopped
+ * before calling StopCallback(), otherwise crashes may occur. This includes
+ * flushing psxgpu's command queue using DrawSync(), stopping CD-ROM reading
+ * and calling StopPAD() to disable the BIOS controller driver if used.
+ *
* @see RestartCallback()
*/
void StopCallback(void);
@@ -203,5 +238,3 @@ void StopCallback(void);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h
index 26e560f..d7f1ad5 100644
--- a/libpsn00b/include/psxgpu.h
+++ b/libpsn00b/include/psxgpu.h
@@ -1,10 +1,26 @@
/*
* PSn00bSDK GPU library
- * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __PSXGPU_H
-#define __PSXGPU_H
+/**
+ * @file psxgpu.h
+ * @brief GPU library header
+ *
+ * @details This library provides access to the PS1's GPU through a fully
+ * asynchronous command queue, which allows GPU commands to be batched and sent
+ * efficiently in the background without stalling the CPU. Helper structures
+ * and macros to initialize, generate and link GPU display lists in memory are
+ * also provided, in addition to support for asynchronous VRAM data transfers
+ * and a debug font API that can be used to easily draw text overlays for
+ * debugging purposes.
+ *
+ * This library is for the most part a drop-in replacement for the official
+ * SDK's GPU library and is only missing a handful of functions, mainly related
+ * to Kanji debug fonts and command queue pausing.
+ */
+
+#pragma once
#include <stdint.h>
#include <stddef.h>
@@ -28,6 +44,11 @@ typedef enum _GPU_VideoMode {
MODE_PAL = 1
} GPU_VideoMode;
+typedef enum _GPU_DrawOpType {
+ DRAWOP_TYPE_DMA = 1,
+ DRAWOP_TYPE_GPU_IRQ = 2
+} GPU_DrawOpType;
+
/* Structure macros */
#define setVector(v, _x, _y, _z) \
@@ -83,7 +104,7 @@ typedef enum _GPU_VideoMode {
(p)->u0 = (_u0), (p)->v0 = (_v0), \
(p)->u1 = (_u1), (p)->v1 = (_v1), \
(p)->u2 = (_u2), (p)->v2 = (_v2)
-
+
#define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \
(p)->u0 = (_u0), (p)->v0 = (_v0), \
(p)->u1 = (_u1), (p)->v1 = (_v1), \
@@ -101,9 +122,12 @@ typedef enum _GPU_VideoMode {
#define setlen(p, _len) (((P_TAG *) (p))->len = (uint8_t) (_len))
#define setaddr(p, _addr) (((P_TAG *) (p))->addr = (uint32_t) (_addr))
#define setcode(p, _code) (((P_TAG *) (p))->code = (uint8_t) (_code))
+#define setcode_T(p, _code) (((P_TAG_T *) (p))->code = (uint8_t) (_code))
+
#define getlen(p) (((P_TAG *) (p))->len)
#define getaddr(p) (((P_TAG *) (p))->addr)
#define getcode(p) (((P_TAG *) (p))->code)
+#define getcode_T(p) (((P_TAG_T *) (p))->code)
#define nextPrim(p) (void *) (0x80000000 | (((P_TAG *) (p))->addr))
#define isendprim(p) ((((P_TAG *) (p))->addr) == 0xffffff)
@@ -114,16 +138,20 @@ typedef enum _GPU_VideoMode {
#define setSemiTrans(p, abe) \
((abe) ? (getcode(p) |= 2) : (getcode(p) &= ~2))
+#define setSemiTrans_T(p, abe) \
+ ((abe) ? (getcode_T(p) |= 2) : (getcode_T(p) &= ~2))
#define setShadeTex(p, tge) \
((tge) ? (getcode(p) |= 1) : (getcode(p) &= ~1))
+#define setShadeTex_T(p, tge) \
+ ((tge) ? (getcode_T(p) |= 1) : (getcode_T(p) &= ~1))
#define getTPage(tp, abr, x, y) ( \
- (((x) / 64) & 15) | \
- ((((y) / 256) & 1) << 4) | \
- (((abr) & 3) << 5) | \
- (((tp) & 3) << 7) | \
- ((((y) / 512) & 1) << 11) \
+ (((x) & 0x3c0) >> 6) | \
+ (((y) & 0x100) >> 4) | \
+ (((y) & 0x200) << 2) | \
+ (((abr) & 3) << 5) | \
+ (((tp) & 3) << 7) \
)
#define getClut(x, y) (((y) << 6) | (((x) >> 4) & 0x3f))
@@ -147,59 +175,109 @@ typedef enum _GPU_VideoMode {
#define setTile(p) setlen(p, 3), setcode(p, 0x60)
#define setLineF2(p) setlen(p, 3), setcode(p, 0x40)
#define setLineG2(p) setlen(p, 4), setcode(p, 0x50)
-#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), \
- (p)->pad = 0x55555555
-#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), \
- (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0
-#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), \
- (p)->pad = 0x55555555
-#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), \
- (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0
-#define setFill(p) setlen(p, 3), setcode(p, 0x02)
-#define setVram2Vram(p) setlen(p, 8), setcode(p, 0x80), \
+#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), (p)->pad = 0x55555555
+#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), (p)->pad = 0x55555555, \
+ (p)->p1 = 0, (p)->p2 = 0
+#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), (p)->pad = 0x55555555
+#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), (p)->pad = 0x55555555, \
+ (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0
+#define setFill(p) setlen(p, 3), setcode(p, 0x02)
+#define setBlit(p) setlen(p, 8), setcode(p, 0x80), \
(p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0
-#define setDrawTPage(p, dfe, dtd, tpage) \
- setlen(p, 1), \
+#define setPolyF3_T(p) setcode_T(p, 0x20)
+#define setPolyFT3_T(p) setcode_T(p, 0x24)
+#define setPolyG3_T(p) setcode_T(p, 0x30)
+#define setPolyGT3_T(p) setcode_T(p, 0x34)
+#define setPolyF4_T(p) setcode_T(p, 0x28)
+#define setPolyFT4_T(p) setcode_T(p, 0x2c)
+#define setPolyG4_T(p) setcode_T(p, 0x38)
+#define setPolyGT4_T(p) setcode_T(p, 0x3c)
+#define setSprt8_T(p) setcode_T(p, 0x74)
+#define setSprt16_T(p) setcode_T(p, 0x7c)
+#define setSprt_T(p) setcode_T(p, 0x64)
+#define setTile1_T(p) setcode_T(p, 0x68)
+#define setTile8_T(p) setcode_T(p, 0x70)
+#define setTile16_T(p) setcode_T(p, 0x78)
+#define setTile_T(p) setcode_T(p, 0x60)
+#define setLineF2_T(p) setcode_T(p, 0x40)
+#define setLineG2_T(p) setcode_T(p, 0x50)
+#define setLineF3_T(p) setcode_T(p, 0x48), (p)->pad = 0x55555555
+#define setLineG3_T(p) setcode_T(p, 0x58), (p)->pad = 0x55555555, \
+ (p)->p1 = 0, (p)->p2 = 0
+#define setLineF4_T(p) setcode_T(p, 0x4c), (p)->pad = 0x55555555
+#define setLineG4_T(p) setcode_T(p, 0x5c), (p)->pad = 0x55555555, \
+ (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0
+#define setFill_T(p) setcode_T(p, 0x02)
+#define setBlit_T(p) setcode_T(p, 0x80), \
+ (p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0
+
+#define setDrawTPage_T(p, dfe, dtd, tpage) \
(p)->code[0] = (0xe1000000 | \
(tpage) | \
- ((dtd) << 9) | \
- ((dfe) << 10) \
+ (((dtd) & 1) << 9) | \
+ (((dfe) & 1) << 10) \
)
+#define setDrawTPage(p, dfe, dtd, tpage) \
+ setlen(p, 1), setDrawTPage_T(p, dfe, dtd, tpage)
-#define setDrawOffset(p, _x, _y) \
- setlen(p, 1), \
- (p)->code[0] = (0xe5000000 | \
- ((_x) % 1024) | \
- (((_y) % 1024) << 11) \
+#define setTexWindow_T(p, r) \
+ (p)->code[0] = (0xe2000000 | \
+ ((r)->w & 0x1f) | \
+ (((r)->h & 0x1f) << 5) | \
+ (((r)->x & 0x1f) << 10) | \
+ (((r)->y & 0x1f) << 15) \
)
+#define setTexWindow(p, r) \
+ setlen(p, 1), setTexWindow_T(p, r)
-#define setDrawMask(p, sb, mt) \
- setlen(p, 1), \
- (p)->code[0] = (0xe6000000 | (sb) | ((mt) << 1))
-
-#define setDrawArea(p, r) \
- setlen(p, 2), \
+#define setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) \
(p)->code[0] = (0xe3000000 | \
- ((r)->x % 1024) | \
- (((r)->y % 1024) << 10) \
+ ((_x0) & 0x3ff) | \
+ (((_y0) & 0x3ff) << 10) \
), \
(p)->code[1] = (0xe4000000 | \
- (((r)->x + (r)->w - 1) % 1024) | \
- ((((r)->y + (r)->h - 1) % 1024) << 10) \
+ ((_x1) & 0x3ff) | \
+ (((_y1) & 0x3ff) << 10) \
)
+#define setDrawAreaXY(p, _x0, _y0, _x1, _y1) \
+ setlen(p, 2), setDrawAreaXY_T(p, _x0, _y0, _x1, _y1)
+
+#define setDrawArea_T(p, r) \
+ setDrawAreaXY_T(p, \
+ (r)->x, \
+ (r)->y, \
+ (r)->x + (r)->w - 1, \
+ (r)->y + (r)->h - 1 \
+ )
+#define setDrawArea(p, r) \
+ setlen(p, 2), setDrawArea_T(p, r)
-#define setTexWindow(p, r) \
- setlen(p, 1), \
- (p)->code[0] = (0xe2000000 | \
- ((r)->w % 32) | \
- (((r)->h % 32) << 5) | \
- (((r)->x % 32) << 10) | \
- (((r)->y % 32) << 15) \
+#define setDrawOffset_T(p, _x, _y) \
+ (p)->code[0] = (0xe5000000 | \
+ ((_x) & 0x7ff) | \
+ (((_y) & 0x7ff) << 11) \
)
+#define setDrawOffset(p, _x, _y) \
+ setlen(p, 1), setDrawOffset_T(p, _x, _y)
+
+#define setDrawStp_T(p, pbw, mt) \
+ (p)->code[0] = (0xe6000000 | (pbw) | ((mt) << 1))
+#define setDrawStp(p, pbw, mt) \
+ setlen(p, 1), setDrawStp_T(p, pbw, mt)
+
+#define setDrawIRQ_T(p) \
+ (p)->code[0] = 0x1f000000
+#define setDrawIRQ(p) \
+ setlen(p, 1), setDrawIRQ_T(p)
/* Primitive structure definitions */
+typedef struct _P_TAG_T {
+ uint32_t color:24;
+ uint32_t code:8;
+} P_TAG_T;
+
typedef struct _P_TAG {
uint32_t addr:24;
uint32_t len:8;
@@ -212,25 +290,31 @@ typedef struct _P_COLOR {
uint32_t pad:8;
} P_COLOR;
-typedef struct _POLY_F3 {
- uint32_t tag;
+// These macros are used to define two variants of each primitive, a regular one
+// and a "tagless" one (_T suffix) without the OT/display list header.
+#define _DEF_PRIM(name, ...) \
+ typedef struct _##name##_T { __VA_ARGS__ } name##_T; \
+ typedef struct _##name { uint32_t tag; __VA_ARGS__ } name;
+#define _DEF_ALIAS(name, target) \
+ typedef struct _##target##_T name##_T; \
+ typedef struct _##target name;
+
+_DEF_PRIM(POLY_F3,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
int16_t x1, y1;
int16_t x2, y2;
-} POLY_F3;
+)
-typedef struct _POLY_F4 {
- uint32_t tag;
+_DEF_PRIM(POLY_F4,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
int16_t x1, y1;
int16_t x2, y2;
int16_t x3, y3;
-} POLY_F4;
+)
-typedef struct _POLY_FT3 {
- uint32_t tag;
+_DEF_PRIM(POLY_FT3,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t u0, v0;
@@ -241,10 +325,9 @@ typedef struct _POLY_FT3 {
int16_t x2, y2;
uint8_t u2, v2;
uint16_t pad;
-} POLY_FT3;
+)
-typedef struct _POLY_FT4 {
- uint32_t tag;
+_DEF_PRIM(POLY_FT4,
uint8_t r0, g0, b0, code;
uint16_t x0, y0;
uint8_t u0, v0;
@@ -258,20 +341,18 @@ typedef struct _POLY_FT4 {
int16_t x3, y3;
uint8_t u3, v3;
uint16_t pad1;
-} POLY_FT4;
+)
-typedef struct _POLY_G3 {
- uint32_t tag;
+_DEF_PRIM(POLY_G3,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t r1, g1, b1, pad0;
int16_t x1, y1;
uint8_t r2, g2, b2, pad1;
int16_t x2, y2;
-} POLY_G3;
+)
-typedef struct _POLY_G4 {
- uint32_t tag;
+_DEF_PRIM(POLY_G4,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t r1, g1, b1, pad0;
@@ -280,10 +361,9 @@ typedef struct _POLY_G4 {
int16_t x2, y2;
uint8_t r3, g3, b3, pad2;
int16_t x3, y3;
-} POLY_G4;
+)
-typedef struct _POLY_GT3 {
- uint32_t tag;
+_DEF_PRIM(POLY_GT3,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t u0, v0;
@@ -296,10 +376,9 @@ typedef struct _POLY_GT3 {
int16_t x2, y2;
uint8_t u2, v2;
uint16_t pad2;
-} POLY_GT3;
+)
-typedef struct _POLY_GT4 {
- uint32_t tag;
+_DEF_PRIM(POLY_GT4,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t u0, v0;
@@ -316,34 +395,30 @@ typedef struct _POLY_GT4 {
int16_t x3, y3;
uint8_t u3, v3;
uint16_t pad4;
-} POLY_GT4;
+)
-typedef struct _LINE_F2 {
- uint32_t tag;
+_DEF_PRIM(LINE_F2,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
int16_t x1, y1;
-} LINE_F2;
+)
-typedef struct _LINE_G2 {
- uint32_t tag;
+_DEF_PRIM(LINE_G2,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t r1, g1, b1, p1;
int16_t x1, y1;
-} LINE_G2;
+)
-typedef struct _LINE_F3 {
- uint32_t tag;
+_DEF_PRIM(LINE_F3,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
int16_t x1, y1;
int16_t x2, y2;
uint32_t pad;
-} LINE_F3;
+)
-typedef struct _LINE_G3 {
- uint32_t tag;
+_DEF_PRIM(LINE_G3,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t r1, g1, b1, p1;
@@ -351,20 +426,18 @@ typedef struct _LINE_G3 {
uint8_t r2, g2, b2, p2;
int16_t x2, y2;
uint32_t pad;
-} LINE_G3;
+)
-typedef struct _LINE_F4 {
- uint32_t tag;
+_DEF_PRIM(LINE_F4,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
int16_t x1, y1;
int16_t x2, y2;
int16_t x3, y3;
uint32_t pad;
-} LINE_F4;
+)
-typedef struct _LINE_G4 {
- uint32_t tag;
+_DEF_PRIM(LINE_G4,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t r1, g1, b1, p1;
@@ -374,88 +447,80 @@ typedef struct _LINE_G4 {
uint8_t r3, g3, b3, p3;
int16_t x3, y3;
uint32_t pad;
-} LINE_G4;
+)
-typedef struct _TILE {
- uint32_t tag;
+_DEF_PRIM(TILE,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
int16_t w, h;
-} TILE;
+)
-struct _TILE_FIXED {
- uint32_t tag;
+_DEF_PRIM(TILE_1,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
-};
-typedef struct _TILE_FIXED TILE_1;
-typedef struct _TILE_FIXED TILE_8;
-typedef struct _TILE_FIXED TILE_16;
+)
+_DEF_ALIAS(TILE_8, TILE_1)
+_DEF_ALIAS(TILE_16, TILE_1)
-typedef struct _SPRT {
- uint32_t tag;
+_DEF_PRIM(SPRT,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t u0, v0;
uint16_t clut;
uint16_t w, h;
-} SPRT;
+)
-struct _SPRT_FIXED {
- uint32_t tag;
+_DEF_PRIM(SPRT_1,
uint8_t r0, g0, b0, code;
int16_t x0, y0;
uint8_t u0, v0;
uint16_t clut;
-};
-typedef struct _SPRT_FIXED SPRT_8;
-typedef struct _SPRT_FIXED SPRT_16;
-
-typedef struct _DR_ENV {
- uint32_t tag;
- uint32_t code[8];
-} DR_ENV;
-
-typedef struct _DR_AREA {
- uint32_t tag;
- uint32_t code[2];
-} DR_AREA;
-
-typedef struct _DR_OFFSET {
- uint32_t tag;
- uint32_t code[1];
-} DR_OFFSET;
-
-typedef struct _DR_TWIN {
- uint32_t tag;
- uint32_t code[2];
-} DR_TWIN;
-
-typedef struct _DR_TPAGE {
- uint32_t tag;
- uint32_t code[1];
-} DR_TPAGE;
-
-typedef struct _DR_MASK {
- uint32_t tag;
- uint32_t code[1];
-} DR_MASK;
+)
+_DEF_ALIAS(SPRT_8, SPRT_1)
+_DEF_ALIAS(SPRT_16, SPRT_1)
-typedef struct _FILL {
- uint32_t tag;
+_DEF_PRIM(FILL,
uint8_t r0, g0, b0, code;
- uint16_t x0, y0; // Note: coordinates must be in 16 pixel steps
+ uint16_t x0, y0;
uint16_t w, h;
-} FILL;
+)
-typedef struct _VRAM2VRAM {
- uint32_t tag;
+_DEF_PRIM(DR_MOVE,
uint8_t p0, p1, p2, code;
uint16_t x0, y0;
uint16_t x1, y1;
uint16_t w, h;
- uint32_t pad[4];
-} VRAM2VRAM;
+)
+
+_DEF_PRIM(DR_AREA,
+ uint32_t code[2];
+)
+_DEF_PRIM(DR_OFFSET,
+ uint32_t code[1];
+)
+_DEF_PRIM(DR_TWIN,
+ uint32_t code[1];
+)
+_DEF_PRIM(DR_TPAGE,
+ uint32_t code[1];
+)
+_DEF_PRIM(DR_STP,
+ uint32_t code[1];
+)
+_DEF_PRIM(DR_IRQ,
+ uint32_t code[1];
+)
+
+_DEF_PRIM(DR_ENV,
+ DR_TPAGE_T tpage;
+ DR_TWIN_T twin;
+ DR_AREA_T area;
+ DR_OFFSET_T offset;
+ FILL_T fill;
+)
+
+#undef _DEF_PRIM
+#undef _DEF_ALIAS
/* Structure definitions */
@@ -478,13 +543,13 @@ typedef struct _DISPENV {
typedef struct _DRAWENV {
RECT clip; // Drawing area
int16_t ofs[2]; // GPU draw offset (relative to draw area)
- RECT tw; // Texture window (doesn't do anything atm)
+ RECT tw; // Texture window
uint16_t tpage; // Initial tpage value
uint8_t dtd; // Dither processing flag (simply OR'ed to tpage)
uint8_t dfe; // Drawing to display area blocked/allowed (simply OR'ed to tpage)
uint8_t isbg; // Clear draw area if non-zero
uint8_t r0, g0, b0; // Draw area clear color (if isbg iz nonzero)
- DR_ENV dr_env; // Draw mode packet area (used by PutDrawEnv)
+ DR_ENV dr_env; // GPU primitive cache area (used internally)
} DRAWENV;
typedef struct _TIM_IMAGE {
@@ -521,31 +586,35 @@ void PutDrawEnv(DRAWENV *env);
void PutDrawEnvFast(DRAWENV *env);
int GetODE(void);
+int IsIdleGPU(int timeout);
int VSync(int mode);
void *VSyncHaltFunction(void (*func)(void));
void *VSyncCallback(void (*func)(void));
-int EnqueueDrawOp(
- void (*func)(uint32_t, uint32_t, uint32_t),
- uint32_t arg1,
- uint32_t arg2,
- uint32_t arg3
-);
+void SetDrawOpType(GPU_DrawOpType type);
+int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3);
int DrawSync(int mode);
void *DrawSyncCallback(void (*func)(void));
int LoadImage(const RECT *rect, const uint32_t *data);
int StoreImage(const RECT *rect, uint32_t *data);
-//int MoveImage(const RECT *rect, int x, int y);
+int MoveImage(const RECT *rect, int x, int y);
void LoadImage2(const RECT *rect, const uint32_t *data);
void StoreImage2(const RECT *rect, uint32_t *data);
-//void MoveImage2(const RECT *rect, int x, int y);
+void MoveImage2(const RECT *rect, int x, int y);
void ClearOTagR(uint32_t *ot, size_t length);
void ClearOTag(uint32_t *ot, size_t length);
int DrawOTag(const uint32_t *ot);
+int DrawOTagIRQ(const uint32_t *ot);
int DrawOTagEnv(const uint32_t *ot, DRAWENV *env);
+int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env);
void DrawOTag2(const uint32_t *ot);
+void DrawOTagIRQ2(const uint32_t *ot);
+int DrawBuffer(const uint32_t *buf, size_t length);
+int DrawBufferIRQ(const uint32_t *buf, size_t length);
+void DrawBuffer2(const uint32_t *buf, size_t length);
+void DrawBufferIRQ2(const uint32_t *buf, size_t length);
void DrawPrim(const uint32_t *pri);
void AddPrim(uint32_t *ot, const void *pri);
@@ -565,5 +634,3 @@ char *FntFlush(int id);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxgte.h b/libpsn00b/include/psxgte.h
index 91dfd6a..2200a55 100644
--- a/libpsn00b/include/psxgte.h
+++ b/libpsn00b/include/psxgte.h
@@ -14,8 +14,7 @@
* registers and issue commands to the GTE.
*/
-#ifndef __PSXGTE_H
-#define __PSXGTE_H
+#pragma once
#include <stdint.h>
@@ -259,5 +258,3 @@ void Square0(VECTOR *v0, VECTOR *v1);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxkernel.h b/libpsn00b/include/psxkernel.h
deleted file mode 100644
index 0c55bcb..0000000
--- a/libpsn00b/include/psxkernel.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef _PSXKERNEL_H
-#define _PSXKERNEL_H
-
-// Event descriptors
-#define DescMask 0xff000000 // Event descriptor mask
-#define DescTH DescMask
-#define DescHW 0xf0000000 // Hardware event (IRQ)
-#define DescEV 0xf1000000 // Event event
-#define DescRC 0xf2000000 // Root counter event
-#define DescUEV 0xf3000000 // User event
-#define DescSW 0xf4000000 // BIOS event
-
-// Hardware events
-#define HwVBLANK (DescHW|0x01) // VBlank
-#define HwGPU (DescHW|0x02) // GPU
-#define HwCdRom (DescHW|0x03) // CDROM
-#define HwDMAC (DescHW|0x04) // DMA
-#define HwRTC0 (DescHW|0x05) // Timer 0
-#define HwRTC1 (DescHW|0x06) // Timer 1
-#define HwRTC2 (DescHW|0x07) // Timer 2
-#define HwCNTL (DescHW|0x08) // Controller
-#define HwSPU (DescHW|0x09) // SPU
-#define HwPIO (DescHW|0x0a) // PIO & lightgun
-#define HwSIO (DescHW|0x0b) // Serial
-
-#define HwCPU (DescHW|0x10) // Processor exception
-#define HwCARD (DescHW|0x11) // Memory card (lower level BIOS functions)
-#define HwCard_0 (DescHW|0x12) // Memory card (unused)
-#define HwCard_1 (DescHW|0x13) // Memory card (unused)
-#define SwCARD (DescSW|0x01) // Memory card (higher level BIOS functions)
-#define SwMATH (DescSW|0x02) // Libmath related apparently, unknown purpose
-
-#define RCntCNT0 (DescRC|0x00) // Root counter 0 (dot clock)
-#define RCntCNT1 (DescRC|0x01) // Horizontal sync
-#define RCntCNT2 (DescRC|0x02) // 1/8 of system clock
-#define RCntCNT3 (DescRC|0x03) // Vertical blank
-
-#define RCntMdINTR 0x1000 // General interrupt
-#define RCntMdNOINTR 0x2000 // New device
-#define RCntMdSC 0x0001 // Counter becomes zero
-#define RCntMdSP 0x0000 // Unknown purpose
-#define RCntMdFR 0x0000
-#define RCntMdGATE 0x0010 // Command acknowledged
-
-#endif // _PSXKERNEL_H \ No newline at end of file
diff --git a/libpsn00b/include/psxpad.h b/libpsn00b/include/psxpad.h
index 32f7f8a..09f28c4 100644
--- a/libpsn00b/include/psxpad.h
+++ b/libpsn00b/include/psxpad.h
@@ -11,8 +11,7 @@
* Reference: https://gist.github.com/scanlime/5042071
*/
-#ifndef _PSXPAD_H
-#define _PSXPAD_H
+#pragma once
#include <stdint.h>
@@ -234,5 +233,3 @@ typedef struct __attribute__((packed)) _MemCardRequest {
uint8_t checksum; // = lba_h ^ lba_l ^ data (CMD_WRITE only)
uint8_t dummy2[3];
} MemCardRequest;
-
-#endif \ No newline at end of file
diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h
index dc1d52c..f26e030 100644
--- a/libpsn00b/include/psxpress.h
+++ b/libpsn00b/include/psxpress.h
@@ -1,6 +1,6 @@
/*
* PSn00bSDK MDEC library
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*/
/**
@@ -17,11 +17,12 @@
* FMV playback is not part of this library per se, but can implemented using
* the APIs defined here alongside some code to stream data from the CD drive.
*
- * Currently only version 1 and 2 .BS files are supported.
+ * Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI
+ * bitstreams are not supported, but no encoder is publicly available for those
+ * anyway.
*/
-#ifndef __PSXPRESS_H
-#define __PSXPRESS_H
+#pragma once
#include <stdint.h>
#include <stddef.h>
@@ -34,28 +35,26 @@ typedef struct _DECDCTENV {
int16_t dct[64]; // Inverse DCT matrix (2.14 fixed-point)
} DECDCTENV;
-// This is the "small" lookup table used by DecDCTvlc(). It can be copied to
-// the scratchpad.
+typedef struct _VLC_TableV2 {
+ uint16_t ac0[2];
+ uint32_t ac2[8], ac3[64];
+ uint16_t ac4[8], ac5[8], ac7[16], ac8[32];
+ uint16_t ac9[32], ac10[32], ac11[32], ac12[32];
+} VLC_TableV2;
+
+typedef struct _VLC_TableV3 {
+ uint16_t ac0[2];
+ uint32_t ac2[8], ac3[64];
+ uint16_t ac4[8], ac5[8], ac7[16], ac8[32];
+ uint16_t ac9[32], ac10[32], ac11[32], ac12[32];
+ uint8_t dc[128], dc_len[9];
+ uint8_t _reserved[3];
+} VLC_TableV3;
+
typedef struct _DECDCTTAB {
- uint16_t lut0[2];
- uint32_t lut2[8];
- uint32_t lut3[64];
- uint16_t lut4[8];
- uint16_t lut5[8];
- uint16_t lut7[16];
- uint16_t lut8[32];
- uint16_t lut9[32];
- uint16_t lut10[32];
- uint16_t lut11[32];
- uint16_t lut12[32];
+ uint32_t ac[8192], ac00[512];
} DECDCTTAB;
-// This is the "large" table used by DecDCTvlc2().
-typedef struct _DECDCTTAB2 {
- uint32_t lut[8192];
- uint32_t lut00[512];
-} DECDCTTAB2;
-
typedef enum _DECDCTMODE {
DECDCT_MODE_24BPP = 1,
DECDCT_MODE_16BPP = 0,
@@ -66,8 +65,9 @@ typedef enum _DECDCTMODE {
typedef struct _VLC_Context {
const uint32_t *input;
uint32_t window, next_window, remaining;
- uint16_t quant_scale;
int8_t is_v3, bit_offset, block_index, coeff_index;
+ uint16_t quant_scale;
+ int16_t last_y, last_cr, last_cb;
} VLC_Context;
// Despite what some docs claim, the "number of 32-byte blocks" and "always
@@ -233,8 +233,9 @@ int DecDCToutSync(int mode);
* frame) into a buffer that can be passed to DecDCTin(). This function uses a
* small (<1 KB) lookup table combined with the GTE to accelerate the process;
* performance is roughly on par with DecDCTvlcStart2() if the lookup table
- * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTable(). The
- * contents of the GTE's LZCR register, if any, will be destroyed.
+ * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTableV2() or
+ * DecDCTvlcCopyTableV3(). The contents of the GTE's LZCS and LZCR registers,
+ * if any, will be destroyed.
*
* A VLC_Context object must be created and passed to this function, which will
* then proceed to initialize its fields. The max_size argument sets the
@@ -244,8 +245,6 @@ int DecDCToutSync(int mode);
* can be different). If max_size = 0, the entire frame will always be decoded
* in one shot.
*
- * Only bitstream version 2 is currently supported.
- *
* WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the
* first time. Attempting to call this function with the GTE disabled will
* result in a crash.
@@ -256,7 +255,7 @@ int DecDCToutSync(int mode);
* @param bs
* @return 0, 1 if more data needs to be output or -1 in case of failure
*
- * @see DecDCTvlcContinue(), DecDCTvlcCopyTable()
+ * @see DecDCTvlcContinue(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3()
*/
int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs);
@@ -275,7 +274,8 @@ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint3
* context returned 0; in that case the context shall be discarded or reused to
* decode another bitstream.
*
- * The contents of the GTE's LZCR register, if any, will be destroyed.
+ * The contents of the GTE's LZCS and LZCR registers, if any, will be
+ * destroyed.
*
* See DecDCTvlcStart() for more details.
*
@@ -309,7 +309,7 @@ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size);
* @param buf
* @return 0, 1 if more data needs to be output or -1 in case of failure
*
- * @see DecDCTvlcSize(), DecDCTvlcCopyTable()
+ * @see DecDCTvlcSize(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3()
*/
int DecDCTvlc(const uint32_t *bs, uint32_t *buf);
@@ -332,23 +332,60 @@ int DecDCTvlc(const uint32_t *bs, uint32_t *buf);
size_t DecDCTvlcSize(size_t size);
/**
- * @brief Moves the lookup table used by the .BS decompressor to the scratchpad
- * region.
+ * @brief Copies the lookup tables used by the .BS decompressor (v1/v2) to the
+ * scratchpad region.
*
- * @details Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(),
- * DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified
- * address. A copy of this table is always present in main RAM, however this
- * function can be used to copy it to the scratchpad region to boost
- * decompression performance.
+ * @details Copies the lookup table used by DecDCTvlcContinue(),
+ * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this
+ * table is always present in main RAM, however this function can be used to
+ * copy it to the scratchpad region to boost decompression performance.
+ *
+ * This function copies a 676-byte table (VLC_TableV2 structure) containing
+ * only the data necessary for decoding version 1 and 2 bitstreams, to help
+ * save scratchpad space. If support for version 3 is required,
+ * DecDCTvlcCopyTableV3() can be used instead to copy the full 816-byte table.
*
* The address passed to this function is saved. Calls to DecDCTvlcStart(),
* DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table
- * copied. Call DecDCTvlcCopyTable(0) to revert to using the library's internal
- * table in main RAM.
+ * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to
+ * using the library's internal table in main RAM.
+ *
+ * WARNING: attempting to decode a version 3 .BS file or .STR frame after
+ * calling this function will result in undefined behavior and potentially a
+ * crash. To re-enable version 3 decoding, use DecDCTvlcCopyTableV3() to copy
+ * the full table to the scratchpad or revert to using the built-in table in
+ * main RAM.
+ *
+ * @param addr Pointer to free 676-byte area in scratchpad region or 0 to reset
*
- * @param addr Pointer to free area in scratchpad region or 0 to reset
+ * @see DecDCTvlcCopyTableV3()
*/
-void DecDCTvlcCopyTable(DECDCTTAB *addr);
+void DecDCTvlcCopyTableV2(VLC_TableV2 *addr);
+
+/**
+ * @brief Copies the lookup tables used by the .BS decompressor (v1/v2/v3) to
+ * the scratchpad region.
+ *
+ * @details Copies the lookup table used by DecDCTvlcContinue(),
+ * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this
+ * table is always present in main RAM, however this function can be used to
+ * copy it to the scratchpad region to boost decompression performance.
+ *
+ * This function copies the full 816-byte table (VLC_TableV3 structure),
+ * including the data used to decode version 3 bitstreams. If support for
+ * version 3 is not required, DecDCTvlcCopyTableV2() can be used instead to
+ * save scratchpad space by only copying the first 676 bytes of the table.
+ *
+ * The address passed to this function is saved. Calls to DecDCTvlcStart(),
+ * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table
+ * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to
+ * using the library's internal table in main RAM.
+ *
+ * @param addr Pointer to free 816-byte area in scratchpad region or 0 to reset
+ *
+ * @see DecDCTvlcCopyTableV2()
+ */
+void DecDCTvlcCopyTableV3(VLC_TableV3 *addr);
/**
* @brief Decompresses or begins decompressing a .BS file into MDEC codes
@@ -360,8 +397,8 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr);
* calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad.
* Depending on the specific bitstream being decoded DecDCTvlcStart2() might be
* slightly faster or slower than DecDCTvlcStart() with its lookup table copied
- * to the scratchpad (see DecDCTvlcCopyTable()). DecDCTvlcStart() with the
- * table in main RAM tends to be much slower.
+ * to the scratchpad (see DecDCTvlcCopyTableV2() and DecDCTvlcCopyTableV3()).
+ * DecDCTvlcStart() with the table in main RAM tends to be much slower.
*
* A VLC_Context object must be created and passed to this function, which will
* then proceed to initialize its fields. The max_size argument sets the
@@ -371,7 +408,8 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr);
* buffer can be different). If max_size = 0, the entire frame will always be
* decoded in one shot.
*
- * Only bitstream version 2 is currently supported.
+ * This function only supports decoding version 1 and 2 bitstreams. Use
+ * DecDCTvlcStart() to decode a version 3 bitstream.
*
* @param ctx Pointer to VLC_Context structure (which will be initialized)
* @param buf
@@ -432,7 +470,7 @@ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size);
*
* @see DecDCTvlcSize2(), DecDCTvlcBuild()
*/
-int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table);
+int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table);
/**
* @brief Sets the maximum amount of data to be decompressed (alternate
@@ -458,7 +496,7 @@ size_t DecDCTvlcSize2(size_t size);
* the .BS decompressor.
*
* @details Generates the lookup table required by DecDCTvlcStart2(),
- * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the
+ * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB structure) into the
* specified buffer. Since the table is relatively large (34 KB), it is
* recommended to only generate it in a dynamically-allocated buffer when
* needed and deallocate the buffer afterwards.
@@ -468,10 +506,8 @@ size_t DecDCTvlcSize2(size_t size);
*
* @param table
*/
-void DecDCTvlcBuild(DECDCTTAB2 *table);
+void DecDCTvlcBuild(DECDCTTAB *table);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxsio.h b/libpsn00b/include/psxsio.h
index 449e43a..8932830 100644
--- a/libpsn00b/include/psxsio.h
+++ b/libpsn00b/include/psxsio.h
@@ -18,8 +18,7 @@
* debugging purposes.
*/
-#ifndef __PSXSIO_H
-#define __PSXSIO_H
+#pragma once
#include <stdint.h>
@@ -280,5 +279,3 @@ void DelSIO(void);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/psxsn.h b/libpsn00b/include/psxsn.h
new file mode 100644
index 0000000..1acbc18
--- /dev/null
+++ b/libpsn00b/include/psxsn.h
@@ -0,0 +1,51 @@
+/*
+ * PSn00bSDK kernel API library (host file access)
+ * (C) 2023 spicyjpeg - MPL licensed
+ */
+
+/**
+ * @file psxsn.h
+ * @brief Host file access API header
+ *
+ * @details This header provides stubs for the PCDRV API, which grants read and
+ * write access to a directory on the host's filesystem when the executable is
+ * running on an emulator or through a debugger that supports the PCDRV
+ * protocol, such as Unirom or pcsx-redux. These functions are completely
+ * separate and independent from the BIOS file API and do not register any
+ * device drivers.
+ *
+ * Note that in the official SDK these functions are provided by libsn, while
+ * in PSn00bSDK they are part of libpsxapi.
+ */
+
+#pragma once
+
+#include <stddef.h>
+
+typedef enum _PCDRV_OpenMode {
+ PCDRV_MODE_READ = 0,
+ PCDRV_MODE_WRITE = 1,
+ PCDRV_MODE_READ_WRITE = 2
+} PCDRV_OpenMode;
+
+typedef enum _PCDRV_SeekMode {
+ PCDRV_SEEK_SET = 0,
+ PCDRV_SEEK_CUR = 1,
+ PCDRV_SEEK_END = 2
+} PCDRV_SeekMode;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int PCinit(void);
+int PCcreat(const char *path);
+int PCopen(const char *path, PCDRV_OpenMode mode);
+int PCclose(int fd);
+int PCread(int fd, void *data, size_t length);
+int PCwrite(int fd, const void *data, size_t length);
+int PClseek(int fd, int offset, PCDRV_SeekMode mode);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/libpsn00b/include/psxspu.h b/libpsn00b/include/psxspu.h
index cdc3ac7..b544952 100644
--- a/libpsn00b/include/psxspu.h
+++ b/libpsn00b/include/psxspu.h
@@ -1,10 +1,25 @@
/*
* PSn00bSDK SPU library
- * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __PSXSPU_H
-#define __PSXSPU_H
+/**
+ * @file psxspu.h
+ * @brief SPU library header
+ *
+ * @details The PSn00bSDK SPU library allows for SPU initialization, DMA
+ * transfers (both sample data uploads and capture buffer reads) and provides
+ * helper macros for accessing SPU control registers, which can be used to
+ * control sample playback on each channel, configure reverb and enable more
+ * advanced features such as interrupts.
+ *
+ * This library currently has fewer functions than its Sony SDK counterpart, in
+ * part because it is not yet complete but also since the vast majority of the
+ * Sony library's functions are redundant, inefficient and can be replaced with
+ * simple SPU register writes.
+ */
+
+#pragma once
#include <stdint.h>
#include <stddef.h>
@@ -12,6 +27,7 @@
/* Definitions */
+#if 0
typedef enum _SPU_AttrMask {
SPU_VOICE_VOLL = 1 << 0, // Left volume
SPU_VOICE_VOLR = 1 << 1, // Right volume
@@ -33,6 +49,7 @@ typedef enum _SPU_AttrMask {
SPU_VOICE_ADSR_ADSR1 = 1 << 17,
SPU_VOICE_ADSR_ADSR2 = 1 << 18
} SPU_AttrMask;
+#endif
typedef enum _SPU_TransferMode {
SPU_TRANSFER_BY_DMA = 0,
@@ -46,6 +63,7 @@ typedef enum _SPU_WaitMode {
/* Structure definitions */
+#if 0
typedef struct _SpuVolume {
int16_t left, right;
} SpuVolume;
@@ -72,6 +90,7 @@ typedef struct _SpuCommonAttr {
SpuVolume mvol, mvolmode, mvolx;
SpuExtAttr cd, ext;
} SpuCommonAttr;
+#endif
/* Macros */
@@ -137,11 +156,11 @@ size_t SpuRead(uint32_t *data, size_t size);
size_t SpuWrite(const uint32_t *data, size_t size);
size_t SpuWritePartly(const uint32_t *data, size_t size);
SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode);
+SPU_TransferMode SpuGetTransferMode(void);
uint32_t SpuSetTransferStartAddr(uint32_t addr);
+uint32_t SpuGetTransferStartAddr(void);
int SpuIsTransferCompleted(int mode);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/stdio.h b/libpsn00b/include/stdio.h
index 8aaf4c7..1bb5b74 100644
--- a/libpsn00b/include/stdio.h
+++ b/libpsn00b/include/stdio.h
@@ -1,39 +1,26 @@
-#ifndef _STDIO_H
-#define _STDIO_H
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ */
-#include <stdarg.h>
+#pragma once
-// BIOS seek modes
-#ifndef SEEK_SET
-#define SEEK_SET 0
-#endif
-#ifndef SEEK_CUR
-#define SEEK_CUR 1
-#endif
-#ifndef SEEK_END
-#define SEEK_END 2 /* warning: reportedly buggy */
-#endif
+#include <stdarg.h>
#ifdef __cplusplus
extern "C" {
#endif
-// The following functions use the BIOS
-extern void printf (const char *__format, ...);
-
-extern int getc(int __fd);
-extern int putc(int __char, int __fd);
+/* String I/O API (provided by BIOS) */
-#define fputc(__char, __fd) putc(__char, __fd)
-#define fgetc(__char, __fd) getc(__char, __fd)
+int printf(const char *fmt, ...);
+char *gets(char *str);
+void puts(const char *str);
+int getchar(void);
+void putchar(int ch);
-// Console TTY
-extern void gets(char *__s);
-extern void puts(const char *__s);
-extern int getchar(void);
-extern void putchar(int __c);
+/* String formatting API (built-in) */
-// The following functions do not use the BIOS
int vsnprintf(char *string, unsigned int size, const char *fmt, va_list ap);
int vsprintf(char *string, const char *fmt, va_list ap);
int sprintf(char *string, const char *fmt, ...);
@@ -45,5 +32,3 @@ int sscanf(const char *str, const char *fmt, ...);
#ifdef __cplusplus
}
#endif
-
-#endif // _STDIO_H \ No newline at end of file
diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h
index 049d067..19761df 100644
--- a/libpsn00b/include/stdlib.h
+++ b/libpsn00b/include/stdlib.h
@@ -1,10 +1,9 @@
/*
* PSn00bSDK standard library
- * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __STDLIB_H
-#define __STDLIB_H
+#pragma once
#include <stddef.h>
@@ -39,11 +38,11 @@ void srand(int seed);
int abs(int j);
long labs(long i);
-long strtol(const char *nptr, char **endptr, int base);
-long long strtoll(const char *nptr, char **endptr, int base);
-float strtof(const char *nptr, char **endptr);
-double strtod(const char *nptr, char **endptr);
-long double strtold(const char *nptr, char **endptr);
+long strtol(const char *str, char **str_end, int base);
+long long strtoll(const char *str, char **str_end, int base);
+//float strtof(const char *str, char **str_end);
+//double strtod(const char *str, char **str_end);
+//long double strtold(const char *str, char **str_end);
void InitHeap(void *addr, size_t size);
void *sbrk(ptrdiff_t incr);
@@ -59,5 +58,3 @@ void free(void *ptr);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/string.h b/libpsn00b/include/string.h
index ceee066..6310b1a 100644
--- a/libpsn00b/include/string.h
+++ b/libpsn00b/include/string.h
@@ -1,37 +1,40 @@
/*
* PSn00bSDK standard library
- * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __STRING_H
-#define __STRING_H
+#pragma once
+
+#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
-int strcmp(const char *dst , const char *src);
-int strncmp(const char *dst , const char *src , int len);
-char *strpbrk(const char *dst , const char *src);
-char *strtok(char *s , char *set);
-char *strstr(const char *big , const char *little);
-
-char *strcat(char *s , const char *append);
-char *strncat(char *s , const char *append, int n);
-char *strcpy(char *dst , const char *src);
-char *strncpy(char *dst , const char *src , int n);
-int strlen(const char *s);
-char *strchr(const char *s , int c);
-char *strrchr(const char *s , int c);
-
-void *memmove(void *dst , const void *src , int n);
-void *memchr(void *s , int c , int n);
-void *memcpy(void *dst , const void *src , int n);
-void *memset(void *dst , char c , int n);
-int memcmp(const void *b1 , const void *b2 , int n);
+void *memset(void *dest, int ch, size_t count);
+void *memcpy(void *dest, const void *src, size_t count);
+void *memccpy(void *dest, const void *src, int ch, size_t count);
+void *memmove(void *dest, const void *src, size_t count);
+int memcmp(const void *lhs, const void *rhs, size_t count);
+void *memchr(const void *ptr, int ch, size_t count);
+
+char *strcpy(char *dest, const char *src);
+char *strncpy(char *dest, const char *src, size_t count);
+int strcmp(const char *lhs, const char *rhs);
+int strncmp(const char *lhs, const char *rhs, size_t count);
+char *strchr(const char *str, int ch);
+char *strrchr(const char *str, int ch);
+char *strpbrk(const char *str, const char *breakset);
+char *strstr(const char *str, const char *substr);
+
+size_t strlen(const char *str);
+char *strcat(char *dest, const char *src);
+char *strncat(char *dest, const char *src, size_t count);
+char *strdup(const char *str);
+char *strndup(const char *str, size_t count);
+
+char *strtok(char *str, const char *delim);
#ifdef __cplusplus
}
#endif
-
-#endif
diff --git a/libpsn00b/include/strings.h b/libpsn00b/include/strings.h
index 7223ab9..0595637 100644
--- a/libpsn00b/include/strings.h
+++ b/libpsn00b/include/strings.h
@@ -3,8 +3,7 @@
* (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
*/
-#ifndef __STRINGS_H
-#define __STRINGS_H
+#pragma once
#include <string.h>
@@ -15,5 +14,3 @@
#define bcmp(b1, b2, len) memcmp(b1, b2, len)
#define index(s, c) strchr(s, c)
#define rindex(s, c) strrchr(s, c)
-
-#endif
diff --git a/libpsn00b/include/sys/fcntl.h b/libpsn00b/include/sys/fcntl.h
index dfbf5b2..54c2d05 100644
--- a/libpsn00b/include/sys/fcntl.h
+++ b/libpsn00b/include/sys/fcntl.h
@@ -1,8 +1,10 @@
-#ifndef _SYS_FCNTL_H
-#define _SYS_FCNTL_H
+/*
+ * PSn00bSDK kernel API library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
-// File control mode flags for BIOS file functions
-// (many weren't documented in nocash docs)
#define FREAD 0x1 // Read
#define FWRITE 0x2 // Write
#define FNBLOCK 0x4 // Non-blocking read access
@@ -16,5 +18,3 @@
#define FNBUF 0x4000 // No ring buffer and terminal interrupt
#define FASYNC 0x8000 // Asynchronous I/O
#define FNBLOCKS(a) (a<<16) // Number of blocks? (from nocash docs)
-
-#endif \ No newline at end of file
diff --git a/libpsn00b/include/sys/ioctl.h b/libpsn00b/include/sys/ioctl.h
new file mode 100644
index 0000000..af65e5d
--- /dev/null
+++ b/libpsn00b/include/sys/ioctl.h
@@ -0,0 +1,13 @@
+/*
+ * PSn00bSDK kernel API library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+#define EOF -1
+
+#define FIONBLOCK (('f'<<8)|1)
+#define FIOCSCAN (('f'<<8)|2)
+
+#define DIOFORMAT (('d'<<8)|1)
diff --git a/libpsn00b/include/sys/types.h b/libpsn00b/include/sys/types.h
index da43590..9075b5e 100644
--- a/libpsn00b/include/sys/types.h
+++ b/libpsn00b/include/sys/types.h
@@ -1,13 +1,13 @@
-#ifndef _TYPES_H
-#define _TYPES_H
+/*
+ * PSn00bSDK standard library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ */
-//#warning "<sys/types.h> and u_* types are deprecated, include <stdint.h> instead"
+#pragma once
-//#include <stdint.h>
+//#warning "<sys/types.h> and u_* types are deprecated, use <stdint.h> instead"
typedef unsigned char u_char;
typedef unsigned short u_short;
typedef unsigned int u_int;
typedef unsigned long u_long;
-
-#endif // _TYPES_H \ No newline at end of file
diff --git a/libpsn00b/libc/memcmp.s b/libpsn00b/libc/memcmp.s
deleted file mode 100644
index ec1e729..0000000
--- a/libpsn00b/libc/memcmp.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# High speed ASM memcmp implementation by Lameguy64
-#
-# Part of PSn00bSDK
-
-.set noreorder
-
-.section .text
-
-# Arguments:
-# a0 - buffer 1 address
-# a1 - buffer 2 address
-# a2 - bytes to compare
-.global memcmp
-.type memcmp, @function
-memcmp:
- blez $a2, .Lexit
- addi $a2, -1
- lbu $v0, 0($a0)
- lbu $v1, 0($a1)
- addiu $a0, 1
- bne $v0, $v1, .Lmismatch
- addiu $a1, 1
- b memcmp
- nop
-.Lmismatch:
- jr $ra
- sub $v0, $v1
-.Lexit:
- jr $ra
- move $v0, $0
- \ No newline at end of file
diff --git a/libpsn00b/libc/memcpy.s b/libpsn00b/libc/memcpy.s
deleted file mode 100644
index 26edb37..0000000
--- a/libpsn00b/libc/memcpy.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# High speed ASM memcpy implementation by Lameguy64
-#
-# Part of PSn00bSDK
-
-.set noreorder
-
-.section .text
-
-# Arguments:
-# a0 - destination address
-# a1 - source adress
-# a2 - bytes to copy
-.global memcpy
-.type memcpy, @function
-memcpy:
- move $v0, $a0
-.Lloop:
- blez $a2, .Lexit
- addi $a2, -1
- lbu $a3, 0($a1)
- addiu $a1, 1
- sb $a3, 0($a0)
- b .Lloop
- addiu $a0, 1
-.Lexit:
- jr $ra
- nop
- \ No newline at end of file
diff --git a/libpsn00b/libc/memmove.s b/libpsn00b/libc/memmove.s
deleted file mode 100644
index 843ece7..0000000
--- a/libpsn00b/libc/memmove.s
+++ /dev/null
@@ -1,42 +0,0 @@
-.set noreorder
-
-.section .text
-
-# Arguments
-# a0 - destination address
-# a1 - source address
-# a2 - bytes to move
-.global memmove
-.type memmove, @function
-memmove:
- move $v0, $a0
- sltu $v1, $a0, $a1
- blez $v1, .Linit_backward
-.Lloop_forward:
- blez $a2, .Lexit
- addi $a2, -1
- lbu $v1, 0($a1)
- addiu $a1, 1
- sb $v1, 0($a0)
- addiu $a0, 1
- b .Lloop_forward
- nop
-.Linit_backward:
- addu $a0, $a2
- addu $a1, $a2
- addiu $a0, -1
- addiu $a1, -1
- b .Lloop_backward
- nop
-.Lloop_backward:
- blez $a2, .Lexit
- addi $a2, -1
- lbu $v1, 0($a1)
- addiu $a1, -1
- sb $v1, 0($a0)
- addiu $a0, -1
- b .Lloop_backward
- nop
-.Lexit:
- jr $ra
- nop \ No newline at end of file
diff --git a/libpsn00b/libc/start.c b/libpsn00b/libc/start.c
index 9ff09c8..dcbad2d 100644
--- a/libpsn00b/libc/start.c
+++ b/libpsn00b/libc/start.c
@@ -11,11 +11,13 @@
#define KERNEL_ARG_STRING ((const char *) 0x80000180)
#define KERNEL_RETURN_VALUE ((volatile int *) 0x8000dffc)
-/* Argument parsing */
+/* BIOS argv parser (unused, interferes with child executable argv passing) */
int __argc;
const char **__argv;
+#if 0
+
#define ARGC_MAX 16
static const char *_argv_buffer[ARGC_MAX];
@@ -48,6 +50,8 @@ static void _parse_kernel_args(void) {
}
}
+#endif
+
/* Main */
// These are defined by the linker script. Note that these are *NOT* pointers,
@@ -66,11 +70,10 @@ extern int main(int argc, const char* argv[]);
// Even though _start() usually takes no arguments, this implementation allows
// parent executables to pass args directly to child executables without having
// to overwrite the arg strings in kernel RAM.
-void _start_inner(int32_t override_argc, const char **override_argv) {
+void _start_inner(int argc, const char **argv) {
//__asm__ volatile("la $gp, _gp;");
- // Clear BSS 4 bytes at a time. BSS is always aligned to 4 bytes by the
- // linker script.
+ // BSS is always aligned to 4 bytes by the linker script.
for (uint32_t *i = (uint32_t *) __bss_start; i < (uint32_t *) _end; i++)
*i = 0;
@@ -78,17 +81,14 @@ void _start_inner(int32_t override_argc, const char **override_argv) {
// RAM. Note that InitHeap() can be called again in main().
InitHeap((void *) _end + 4, (void *) 0x801ffff8 - (void *) _end);
- if (override_argv) {
- __argc = override_argc;
- __argv = override_argv;
- } else {
- _parse_kernel_args();
- }
+ //_parse_kernel_args();
+ __argc = argc;
+ __argv = argv;
// Call the global constructors (if any) to initialize global objects
// before calling main(). Constructors are put by the linker script in a
// length-prefixed array in reverse order.
- for (uint32_t i = (uint32_t) __CTOR_LIST__[0]; i >= 1; i--)
+ for (int i = (int) __CTOR_LIST__[0]; i >= 1; i--)
__CTOR_LIST__[i]();
// Store main()'s return value into the kernel return value area (for child
@@ -96,6 +96,6 @@ void _start_inner(int32_t override_argc, const char **override_argv) {
*KERNEL_RETURN_VALUE = main(__argc, __argv);
// Call global destructors (in forward order).
- for (uint32_t i = 0; i < (uint32_t) __DTOR_LIST__[0]; i++)
+ for (int i = 0; i < (int) __DTOR_LIST__[0]; i++)
__DTOR_LIST__[i + 1]();
}
diff --git a/libpsn00b/libc/string.c b/libpsn00b/libc/string.c
index a1a9a05..dbc2621 100644
--- a/libpsn00b/libc/string.c
+++ b/libpsn00b/libc/string.c
@@ -1,295 +1,457 @@
/*
- * string.c
- *
- * Inherited from PSXSDK C library
+ * PSn00bSDK standard library
+ * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed
*/
-#include <stdio.h>
-#include <string.h>
+#include <stdint.h>
+#include <stddef.h>
#include <stdlib.h>
+#include <string.h>
// Uncomment to enable strtod(), strtold() and strtof(). Note that these
// functions use extremely slow software floats.
//#define ALLOW_FLOAT
-int tolower(int chr)
-{
- return (chr >='A' && chr<='Z') ? (chr + 32) : (chr);
+/* Character manipulation */
+
+int isprint(int ch) {
+ return (ch >= ' ') && (ch <= '~');
}
-int toupper(int chr)
-{
- return (chr >='a' && chr<='z') ? (chr - 32) : (chr);
+int isgraph(int ch) {
+ return (ch > ' ') && (ch <= '~');
}
-// Need to be replaced with MIPS assembler equivalents
+int isspace(int ch) {
+ return (ch == ' ') || ((ch >= '\t') && (ch <= '\r'));
+}
-void *memchr(void *s , int c , int n)
-{
- while(n--)
- {
- if(*((unsigned char*)s) == (unsigned char)c)
- return s;
-
- s++;
- }
-
- return NULL;
+int isblank(int ch) {
+ return (ch == ' ') || (ch == '\t');
}
-char *strncpy(char *dst, const char *src, int len)
-{
- char *odst=dst;
+int isalpha(int ch) {
+ return ((ch >= 'A') && (ch <= 'Z')) || ((ch >= 'a') && (ch <= 'z'));
+}
- while(*src && len)
- {
- *(dst++) = *(src++);
- len--;
- }
-
- *dst = 0;
-
- return odst;
+int isdigit(int ch) {
+ return (ch >= '0') && (ch <= '9');
}
-char *strcpy(char *dst, const char *src)
-{
- char *odst = dst;
+int tolower(int ch) {
+ if ((ch >= 'A') && (ch <= 'Z'))
+ ch += 'a' - 'A';
- while(*(dst++) = *(src++));
- return odst;
+ return ch;
}
-char *strcat(char *dst, const char *src)
-{
- char *o=dst;
-
- while(*dst)
- dst++;
-
- strcpy(dst, src);
-
- return o;
+int toupper(int ch) {
+ if ((ch >= 'a') && (ch <= 'z'))
+ ch += 'A' - 'a';
+
+ return ch;
}
-char *strncat(char *s, const char *append, int len)
-{
- char *o=s;
-
- while(*s)
- s++;
-
- strncpy(s, append, len);
-
- return o;
+/* Memory buffer manipulation */
+
+// TODO: replace more of these with optimized assembly implementations
+
+/*void *memset(void *dest, int ch, size_t count) {
+ uint8_t *_dest = (uint8_t *) dest;
+
+ for (; count; count--)
+ *(_dest++) = (uint8_t) ch;
+
+ return dest;
+}*/
+
+void *memcpy(void *restrict dest, const void *restrict src, size_t count) {
+ uint8_t *_dest = (uint8_t *) dest;
+ const uint8_t *_src = (const uint8_t *) src;
+
+ for (; count; count--)
+ *(_dest++) = *(_src++);
+
+ return dest;
}
-int strlen(const char *str)
-{
- int i = 0;
- while(*(str++))i++;
- return i;
+void *memccpy(void *restrict dest, const void *restrict src, int ch, size_t count) {
+ uint8_t *_dest = (uint8_t *) dest;
+ const uint8_t *_src = (const uint8_t *) src;
+
+ for (; count; count--) {
+ uint8_t a = *(_src++);
+
+ *(_dest++) = a;
+ if (a == ch)
+ return (void *) _dest;
+ }
+
+ return 0;
}
-char *strchr(const char *s, int c)
-{
- int x;
+void *memmove(void *dest, const void *src, size_t count) {
+ uint8_t *_dest = (uint8_t *) dest;
+ const uint8_t *_src = (const uint8_t *) src;
- for(x = 0; x <= strlen(s); x++)
- if(s[x] == c) return (char*)&s[x];
+ if (_dest == _src)
+ return dest;
+ if ((_dest >= &_src[count]) || (&_dest[count] <= _src))
+ return memcpy(dest, src, count);
- return NULL;
+ if (_dest < _src) { // Copy forwards
+ for (; count; count--)
+ *(_dest++) = *(_src++);
+ } else { // Copy backwards
+ _src += count;
+ _dest += count;
+
+ for (; count; count--)
+ *(--_dest) = *(--_src);
+ }
+
+ return dest;
}
-char *strrchr(const char *s, int c)
-{
- int x;
+int memcmp(const void *lhs, const void *rhs, size_t count) {
+ const uint8_t *_lhs = (const uint8_t *) lhs;
+ const uint8_t *_rhs = (const uint8_t *) rhs;
+
+ for (; count; count--) {
+ uint8_t a = *(_lhs++), b = *(_rhs++);
- for(x = strlen(s); x>=0; x--)
- if(s[x] == c) return (char*)&s[x];
+ if (a != b)
+ return a - b;
+ }
- return NULL;
+ return 0;
}
-char *strpbrk(const char *s, const char *charset)
-{
- int x,y;
+void *memchr(const void *ptr, int ch, size_t count) {
+ const uint8_t *_ptr = (const uint8_t *) ptr;
- for(x = 0; x < strlen(s); x++)
- for(y = 0; y < strlen(charset); y++)
- if(s[x] == charset[y]) return (char*)&s[x];
+ for (; count; count--, _ptr++) {
+ if (*_ptr == ch)
+ return (void *) _ptr;
+ }
- return NULL;
+ return 0;
}
-char *strstr(const char *big, const char *little)
-{
- int ls = strlen(little);
- int bs = strlen(big);
- int x;
+/* String manipulation */
- if(ls == 0)
- return (char*)big;
-
- if(ls > bs)
- return NULL;
+char *strcpy(char *restrict dest, const char *restrict src) {
+ char *_dest = dest;
- for(x = 0; x <= bs-ls; x++)
- if(memcmp(little, &big[x], ls) == 0)
- return (char*)&big[x];
+ while (*src)
+ *(_dest++) = *(src++);
- return NULL;
+ *_dest = 0;
+ return dest;
}
-int strcmp(const char *s1, const char *s2)
-{
- while((*s1) && (*s2) && (*s1 == *s2))
- {
- s1++;
- s2++;
+char *strncpy(char *restrict dest, const char *restrict src, size_t count) {
+ char *_dest = dest;
+
+ for (; count && *src; count--)
+ *(_dest++) = *(src++);
+ for (; count; count--)
+ *(_dest++) = 0;
+
+ return dest;
+}
+
+int strcmp(const char *lhs, const char *rhs) {
+ for (;;) {
+ char a = *(lhs++), b = *(rhs++);
+
+ if (a != b)
+ return a - b;
+ if (!a && !b)
+ return 0;
+ }
+}
+
+int strncmp(const char *lhs, const char *rhs, size_t count) {
+ for (; count && *lhs && *rhs; count--) {
+ char a = *(lhs++), b = *(rhs++);
+
+ if (a != b)
+ return a - b;
}
- return(*s1-*s2);
+ return 0;
}
-int strncmp(const char *s1, const char *s2, int len)
-{
- int p = 0;
+char *strchr(const char *str, int ch) {
+ for (; *str; str++) {
+ if (*str == ch)
+ return (char *) str;
+ }
- while(*s1 && *s2 && (*s1 == *s2) && p<len)
- {
- p++;
+ return 0;
+}
+
+char *strrchr(const char *str, int ch) {
+ size_t length = strlen(str);
+
+ for (str += length; length; length--) {
+ str--;
+ if (*str == ch)
+ return (char *) str;
+ }
+
+ return 0;
+}
+
+char *strpbrk(const char *str, const char *breakset) {
+ for (; *str; str++) {
+ char a = *str;
- if(p<len)
- {
- s1++;
- s2++;
+ for (const char *ch = breakset; *ch; ch++) {
+ if (a == *ch)
+ return (char *) str;
}
}
- return *s1-*s2;
+ return 0;
}
-// Requires a malloc implementation
-char *strdup(const char *str)
-{
- char *ns = (void*)malloc(strlen(str) + 1);
+char *strstr(const char *str, const char *substr) {
+ size_t length = strlen(substr);
- if(ns == NULL)
- return NULL;
-
- strcpy(ns, str);
- return ns;
+ if (!length)
+ return (char *) str;
+
+ for (; *str; str++) {
+ if (!memcmp(str, substr, length))
+ return (char *) str;
+ }
+
+ return 0;
}
-char *strndup(const char *str, int len)
-{
- int n=strlen(str);
- char *ns = (void*)malloc((n+1)>len?len:(n+1));
+size_t strlen(const char *str) {
+ size_t length = 0;
- if(ns == NULL)
- return NULL;
-
- strncpy(ns, str, (n+1)>len?len:(n+1));
- return ns;
+ for (; *str; str++)
+ length++;
+
+ return length;
}
-
-long long strtoll(const char *nptr, char **endptr, int base)
-{
- int r = 0;
- int t = 0;
- int n = 0;
-
- if(*nptr == '-')
- {
- nptr++;
- n = 1;
+
+// Non-standard, used internally
+size_t strnlen(const char *str, size_t count) {
+ size_t length = 0;
+
+ for (; *str && (length < count); str++)
+ length++;
+
+ return length;
+}
+
+char *strcat(char *restrict dest, const char *restrict src) {
+ char *_dest = &dest[strlen(dest)];
+
+ while (*src)
+ *(_dest++) = *(src++);
+
+ *_dest = 0;
+ return dest;
+}
+
+char *strncat(char *restrict dest, const char *restrict src, size_t count) {
+ char *_dest = &dest[strlen(dest)];
+
+ for (; count && *src; count--)
+ *(_dest++) = *(src++);
+
+ *_dest = 0;
+ return dest;
+}
+
+char *strdup(const char *str) {
+ size_t length = strlen(str) + 1;
+ char *copy = malloc(length);
+
+ if (!copy)
+ return 0;
+
+ memcpy(copy, str, length);
+ return copy;
+}
+
+char *strndup(const char *str, size_t count) {
+ size_t length = strnlen(str, count) + 1;
+ char *copy = malloc(length);
+
+ if (!copy)
+ return 0;
+
+ memcpy(copy, str, length);
+ return copy;
+}
+
+/* String tokenizer */
+
+static char *_strtok_ptr = 0, *_strtok_end_ptr = 0;
+
+char *strtok(char *restrict str, const char *restrict delim) {
+ if (str) {
+ _strtok_ptr = str;
+ _strtok_end_ptr = &str[strlen(str)];
}
- if(base == 0)
- if(*nptr == '0')
- base = 8;
- else
- base = 10;
+ if (_strtok_ptr >= _strtok_end_ptr)
+ return 0;
+ if (!(*_strtok_ptr))
+ return 0;
+
+ char *split = strstr(_strtok_ptr, delim);
+ char *token = _strtok_ptr;
+
+ if (split) {
+ *(split++) = 0;
+ _strtok_ptr = split;
+ } else {
+ _strtok_ptr += strlen(token);
+ }
- if(!(base >= 2 && base <= 36))
+ return token;
+}
+
+/* Number parsers */
+
+long long strtoll(const char *restrict str, char **restrict str_end, int base) {
+ if (!str)
return 0;
- if(base == 16 && *nptr == '0')
- {
- if(*(nptr+1) == 'x' || *(nptr+1) == 'X')
- nptr+=2;
+ while (isspace(*str))
+ str++;
+
+ int negative = (*str == '-');
+ if (negative)
+ str++;
+
+ while (isspace(*str))
+ str++;
+
+ // Parse any base prefix if present. If a base was specified make sure it
+ // matches, otherwise use it to determine which base the value is in.
+ long long value = 0;
+
+ if (*str == '0') {
+ int _base;
+
+ switch (str[1]) {
+ case 0:
+ goto _exit_loop;
+
+ case 'X':
+ case 'x':
+ _base = 16;
+ str += 2;
+ break;
+
+ case 'O':
+ case 'o':
+ _base = 8;
+ str += 2;
+ break;
+
+ case 'B':
+ case 'b':
+ _base = 2;
+ str += 2;
+ break;
+
+ default:
+ // Numbers starting with a zero are *not* interpreted as octal
+ // unless base = 8.
+ _base = 0;
+ str++;
+ }
+
+ if (!base)
+ base = _base;
+ else if (base != _base)
+ return 0;
}
- while(*nptr)
- {
- switch(*nptr)
- {
- case '0'...'9':
- t = *nptr - '0';
- break;
- case 'a' ... 'z':
- t = (*nptr - 'a') + 10;
- break;
+ if (!base)
+ base = 10;
+ else if ((base < 2) || (base > 36))
+ return 0;
+
+ // Parse the actual value.
+ for (; *str; str++) {
+ char ch = *str;
+ int digit;
+
+ switch (ch) {
+ case '0' ... '9':
+ digit = ch - '0';
+ break;
+
case 'A' ... 'Z':
- t = (*nptr - 'A') + 10;
- break;
+ digit = (ch - 'A') + 10;
+ break;
+
+ case 'a' ... 'z':
+ digit = (ch - 'a') + 10;
+ break;
+
default:
- t = 1000;
- break;
+ goto _exit_loop;
}
- if(t>=base)
- break;
-
- r*=base;
- r+=t;
- nptr++;
+ value = (value * base) + digit;
}
- if(endptr)*endptr = (char*)nptr;
- return n?-r:r;
+_exit_loop:
+ if (str_end)
+ *str_end = (char *) str;
+
+ return negative ? (-value) : value;
}
-long strtol(const char *nptr, char **endptr, int base)
-{
- return (long)strtoll(nptr, endptr, base);
+long strtol(const char *restrict str, char **restrict str_end, int base) {
+ return (long) strtoll(str, str_end, base);
}
#ifdef ALLOW_FLOAT
-double strtod(const char *nptr, char **endptr)
-{
+double strtod(const char *restrict str, char **restrict str_end) {
char strbuf[64];
int x = 0;
int y;
double i=0, d=0;
int s=1;
- if(*nptr == '-')
+ if(*str == '-')
{
- nptr++;
+ str++;
s=-1;
}
- while(*nptr >= '0' && *nptr <= '9' && x < 18)
- strbuf[x++] = *(nptr++);
+ while(*str >= '0' && *str <= '9' && x < 18)
+ strbuf[x++] = *(str++);
strbuf[x] = 0;
i = (double)strtoll(strbuf, NULL, 10);
- if(*nptr == '.')
+ if(*str == '.')
{
- nptr++;
+ str++;
x = 0;
- while(*nptr >= '0' && *nptr <= '9' && x < 7)
- strbuf[x++] = *(nptr++);
+ while(*str >= '0' && *str <= '9' && x < 7)
+ strbuf[x++] = *(str++);
strbuf[x] = 0;
- if(endptr != NULL) *endptr = (char*)nptr;
+ if(str_end != NULL) *str_end = (char*)str;
y=1;
@@ -301,67 +463,19 @@ double strtod(const char *nptr, char **endptr)
}
else
{
- if(endptr != NULL)
- *endptr = (char*)nptr;
+ if(str_end != NULL)
+ *str_end = (char*)str;
}
return (i + d)*s;
}
-#endif
-
-/* implementation by Lameguy64, behaves like OpenWatcom's strtok() */
-/* BIOS strtok seemed either bugged, or designed for wide chars */
-
-static char *_strtok_curpos;
-static char *_strtok_endpos;
-
-char *strtok( char *s1, char *s2 )
-{
- char *c,*t;
-
- if( s1 )
- {
- _strtok_curpos = s1;
- _strtok_endpos = s1+strlen( s1 );
- }
- else
- {
- if( _strtok_curpos >= _strtok_endpos )
- return( NULL );
- }
-
- if( !*_strtok_curpos )
- return( NULL );
-
- if( c = strstr( _strtok_curpos, s2 ) )
- {
- *c = 0;
- t = _strtok_curpos;
- _strtok_curpos = c+1;
- return( t );
- }
- else
- {
- t = _strtok_curpos;
- _strtok_curpos += strlen( t );
- return( t );
- }
-
- return( NULL );
-
-} /* strtok */
-
-#ifdef ALLOW_FLOAT
-
-long double strtold(const char *nptr, char **endptr)
-{
- return (long double)strtod(nptr, endptr);
+long double strtold(const char *restrict str, char **restrict str_end) {
+ return (long double) strtod(str, str_end);
}
-float strtof(const char *nptr, char **endptr)
-{
- return (float)strtod(nptr, endptr);
+float strtof(const char *restrict str, char **restrict str_end) {
+ return (float) strtod(str, str_end);
}
#endif
diff --git a/libpsn00b/lzp/bit.h b/libpsn00b/lzp/bit.h
index 321160a..5e7ed23 100644
--- a/libpsn00b/lzp/bit.h
+++ b/libpsn00b/lzp/bit.h
@@ -1,5 +1,5 @@
-#ifndef _LZP_BIT_H
-#define _LZP_BIT_H
+
+#pragma once
extern const unsigned char* inPtr;
extern int inBytes;
@@ -21,6 +21,3 @@ int get_bits(int n);
#ifdef __cplusplus
}
#endif
-
-
-#endif // _LZP_BIT_H
diff --git a/libpsn00b/lzp/compress.c b/libpsn00b/lzp/compress.c
index 9cfc64d..16cb606 100644
--- a/libpsn00b/lzp/compress.c
+++ b/libpsn00b/lzp/compress.c
@@ -1,7 +1,7 @@
// Based on ilia muraviev's CRUSH compressor program which falls under public domain
#include <string.h>
-#if LZP_USE_MALLOC == TRUE
+#ifdef LZP_USE_MALLOC
#include <stdlib.h>
#endif
@@ -11,7 +11,7 @@
// Internal structure for hash table allocation sizes
-#if LZP_NO_COMPRESS == FALSE
+#ifndef LZP_NO_COMPRESS
struct {
short WindowSize; // Window size (17 - 23)
@@ -67,7 +67,7 @@ struct {
// LZ77
//
-#if LZP_NO_COMPRESS == FALSE
+#ifndef LZP_NO_COMPRESS
int update_hash1(int h, int c) {
@@ -108,13 +108,13 @@ int get_penalty(int a, int b) {
int lzCompress(void* outBuff, const void* inBuff, int inSize, int level) {
- #if LZP_USE_MALLOC == FALSE
+#ifndef LZP_USE_MALLOC
int head[HASH1_SIZE+HASH2_SIZE];
int prev[W_SIZE];
- #else
+#else
int* head = malloc(4*(HASH1_SIZE+HASH2_SIZE));
int* prev = malloc(4*W_SIZE);
- #endif
+#endif
int max_chain[] = {4, 256, 1<<12};
@@ -319,10 +319,10 @@ int lzCompress(void* outBuff, const void* inBuff, int inSize, int level) {
flush_bits();
- #if LZP_USE_MALLOC == TRUE
+#ifdef LZP_USE_MALLOC
free(head);
free(prev);
- #endif
+#endif
return(outBytes);
diff --git a/libpsn00b/include/lzconfig.h b/libpsn00b/lzp/lzconfig.h
index cb8a830..83579a3 100644
--- a/libpsn00b/include/lzconfig.h
+++ b/libpsn00b/lzp/lzconfig.h
@@ -3,29 +3,9 @@
* \details Define settings will only take effect when you recompile the library.
*/
-#ifndef _LZP_CONFIG_H
-#define _LZP_CONFIG_H
+#pragma once
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-
-/* Set to TRUE to compile without data compression routines useful if you
- * plan to use this library on a program that does not require said routines
- * especially on a platform with limited memory (such as the PlayStation).
- *
- * This define will rule out lzCompress(), lzSetHashSizes() and
- * lzResetHashSizes() functions and their associated functions.
- */
-#define LZP_NO_COMPRESS TRUE
-
-
-/* Set to TRUE to make default compression table sizes to maximum and works best
+/* Uncomment to make default compression table sizes to maximum and works best
* when compressing large amounts of data. LZP_USE_MALLOC must be set to TRUE to
* prevent stack overflow errors.
*
@@ -34,21 +14,16 @@
*
* This define only affects lzCompress().
*/
-#define LZP_MAX_COMPRESS FALSE
-
+//#define LZP_MAX_COMPRESS
/* Uncomment to make the library use malloc() instead of array initializers to
* allocate hash tables. Enabling this is a must if you plan to use large hash
* and window table sizes.
*/
-#define LZP_USE_MALLOC FALSE
+//#define LZP_USE_MALLOC
-/* Hash table sizes (in power-of-two multiple units)
- *
- * These define only affect lzCompress().
- */
-#if LZP_MAX_COMPRESS == TRUE
+#if defined(PSN00BSDK) && !defined(LZP_MAX_COMPRESS)
// Minimal defaults
#define LZP_WINDOW_SIZE 17
@@ -57,12 +32,11 @@
#else
+#define LZP_USE_MALLOC
+
// Maximum defaults
#define LZP_WINDOW_SIZE 17
#define LZP_HASH1_SIZE 22
#define LZP_HASH2_SIZE 24
#endif
-
-
-#endif // _LZP_CONFIG_H
diff --git a/libpsn00b/lzp/lzp.h b/libpsn00b/lzp/lzp.h
index 456de02..1aeea30 100644
--- a/libpsn00b/lzp/lzp.h
+++ b/libpsn00b/lzp/lzp.h
@@ -1,20 +1,29 @@
-/*! \file lzp.h
- * \brief Main library header
+/*
+ * liblzp data compression library
+ * (C) 2019 Lameguy64 - MPL licensed
*/
-/*! \mainpage
- * \version 0.20b
- * \author John Wilbert 'Lameguy64' Villamor
+/**
+ * @file lzp.h
+ * @brief Utility library for file bundling and compression
*
- * \section creditsSection Credits
- * - LZ77 data compression/decompression routines based from Ilya Muravyov's
- * crush.cpp released under public domain. Refined and ported to C by Lameguy64.
- * - CRC calculation routines based from Lammert Bies' lib_crc routines.
+ * @details This library implements a simple in-memory archive format which
+ * can be used to package and compress assets for faster loading, as well as a
+ * generic LZ77 compressor and matching decompressor. Two archive formats are
+ * supported, one uncompressed (.QLP) and one with individually compressed
+ * entries (.LZP).
*
+ * This header provides the LZ77 compression API and functions to parse and
+ * decompress .LZP archives after they have been loaded into memory.
+ *
+ * @section creditsSection Credits
+ * - LZ77 data compression/decompression routines based from Ilya Muravyov's
+ * crush.cpp released under public domain. Refined and ported to C by
+ * Lameguy64.
+ * - CRC calculation routines based from Lammert Bies' lib_crc routines.
*/
-#ifndef _LZPACK_H
-#define _LZPACK_H
+#pragma once
#include <stdint.h>
#ifdef _WIN32
@@ -218,6 +227,3 @@ int lzpUnpackFile(void* buff, const LZP_HEAD* lzpack, int fileNum);
#ifdef __cplusplus
}
#endif
-
-
-#endif // _LZPACK_H
diff --git a/libpsn00b/lzp/lzqlp.h b/libpsn00b/lzp/lzqlp.h
index 32ce0d7..127f263 100644
--- a/libpsn00b/lzp/lzqlp.h
+++ b/libpsn00b/lzp/lzqlp.h
@@ -1,5 +1,23 @@
-#ifndef _QLP_H
-#define _QLP_H
+/*
+ * liblzp data compression library
+ * (C) 2019 Lameguy64 - MPL licensed
+ */
+
+/**
+ * @file lzqlp.h
+ * @brief Utility library for file bundling
+ *
+ * @details This library implements a simple in-memory archive format which
+ * can be used to package and compress assets for faster loading, as well as a
+ * generic LZ77 compressor and matching decompressor. Two archive formats are
+ * supported, one uncompressed (.QLP) and one with individually compressed
+ * entries (.LZP).
+ *
+ * This header provides functions to parse .QLP archives and retrieve pointers
+ * to their contents after they have been loaded into memory.
+ */
+
+#pragma once
#include <stdint.h>
#ifdef _WIN32
@@ -23,9 +41,17 @@ typedef struct {
uint32_t offs;
} QLP_FILE;
+
+// Function prototypes
+#ifdef __cplusplus
+extern "C" {
+#endif
+
int qlpFileCount(const QLP_HEAD* qlpfile);
const QLP_FILE* qlpFileEntry(int index, const QLP_HEAD* qlpfile);
const void* qlpFileAddr(int index, const QLP_HEAD* qlpfile);
int qlpFindFile(char* fileName, const QLP_HEAD* qlpfile);
-#endif // _QLP_H \ No newline at end of file
+#ifdef __cplusplus
+}
+#endif
diff --git a/libpsn00b/psxapi/_syscalls.s b/libpsn00b/psxapi/_syscalls.s
index 6eaed72..5062b15 100644
--- a/libpsn00b/psxapi/_syscalls.s
+++ b/libpsn00b/psxapi/_syscalls.s
@@ -1,26 +1,28 @@
# PSn00bSDK syscall wrappers
-# (C) 2022 spicyjpeg - MPL licensed
+# (C) 2022-2023 spicyjpeg - MPL licensed
.set noreorder
+## Interrupt enable/disable
+
.section .text.EnterCriticalSection
.global EnterCriticalSection
.type EnterCriticalSection, @function
EnterCriticalSection:
- li $a0, 0x01
+ li $a0, 0x01
syscall 0
- jr $ra
+ jr $ra
nop
.section .text.ExitCriticalSection
.global ExitCriticalSection
.type ExitCriticalSection, @function
ExitCriticalSection:
- li $a0, 0x02
+ li $a0, 0x02
syscall 0
- jr $ra
+ jr $ra
nop
.section .text.SwEnterCriticalSection
@@ -31,7 +33,7 @@ SwEnterCriticalSection:
li $a1, -1026
and $a1, $a0
mtc0 $a1, $12
- andi $a0, 0x0401 # return ((cop0r12_prev & 0x401) == 0x401)
+ andi $a0, 0x0401 # return !((cop0r12_prev & 0x401) < 0x401)
sltiu $v0, $a0, 0x0401
jr $ra
@@ -49,3 +51,100 @@ SwExitCriticalSection:
jr $ra
nop
+
+## PCDRV (host file access) API
+
+.section .text.PCinit
+.global PCinit
+.type PCinit, @function
+PCinit:
+ break 0, 0x101 # () -> error
+
+ jr $ra
+ nop
+
+.section .text.PCcreat
+.global PCcreat
+.type PCcreat, @function
+PCcreat:
+ li $a2, 0
+ move $a1, $a0
+ break 0, 0x102 # (path, path, 0) -> error, fd
+
+ bgez $v0, .Lcreate_ok # if (error < 0) fd = error
+ nop
+ move $v1, $v0
+.Lcreate_ok:
+ jr $ra # return fd
+ move $v0, $v1
+
+.section .text.PCopen
+.global PCopen
+.type PCopen, @function
+PCopen:
+ move $a2, $a1
+ move $a1, $a0
+ break 0, 0x103 # (path, path, mode) -> error, fd
+
+ bgez $v0, .Lopen_ok # if (error < 0) fd = error
+ nop
+ move $v1, $v0
+.Lopen_ok:
+ jr $ra # return fd
+ move $v0, $v1
+
+.section .text.PCclose
+.global PCclose
+.type PCclose, @function
+PCclose:
+ move $a1, $a0
+ break 0, 0x104 # (fd, fd) -> error
+
+ jr $ra
+ nop
+
+.section .text.PCread
+.global PCread
+.type PCread, @function
+PCread:
+ move $a3, $a1
+ move $a1, $a0
+ break 0, 0x105 # (fd, fd, length, data) -> error, length
+
+ bgez $v0, .Lread_ok # if (error < 0) length = error
+ nop
+ move $v1, $v0
+.Lread_ok:
+ jr $ra # return length
+ move $v0, $v1
+
+.section .text.PCwrite
+.global PCwrite
+.type PCwrite, @function
+PCwrite:
+ move $a3, $a1
+ move $a1, $a0
+ break 0, 0x106 # (fd, fd, length, data) -> error, length
+
+ bgez $v0, .Lwrite_ok # if (error < 0) length = error
+ nop
+ move $v1, $v0
+.Lwrite_ok:
+ jr $ra # return length
+ move $v0, $v1
+
+.section .text.PClseek
+.global PClseek
+.type PClseek, @function
+PClseek:
+ move $a3, $a2
+ move $a2, $a1
+ move $a1, $a0
+ break 0, 0x107 # (fd, fd, offset, mode) -> error, offset
+
+ bgez $v0, .Lseek_ok # if (error < 0) offset = error
+ nop
+ move $v1, $v0
+.Lseek_ok:
+ jr $ra # return offset
+ move $v0, $v1
diff --git a/libpsn00b/psxapi/drivers.s b/libpsn00b/psxapi/drivers.s
index d991f90..c601201 100644
--- a/libpsn00b/psxapi/drivers.s
+++ b/libpsn00b/psxapi/drivers.s
@@ -32,10 +32,10 @@ _96_remove:
jr $t2
li $t1, 0x72
-.section .text.AddDummyTty
-.global AddDummyTty
-.type AddDummyTty, @function
-AddDummyTty:
+.section .text.add_nullcon_driver
+.global add_nullcon_driver
+.type add_nullcon_driver, @function
+add_nullcon_driver:
li $t2, 0xa0
jr $t2
li $t1, 0x99
@@ -66,26 +66,26 @@ _card_clear:
## B0 table functions (12)
-.section .text.AddDev
-.global AddDev
-.type AddDev, @function
-AddDev:
+.section .text.AddDrv
+.global AddDrv
+.type AddDrv, @function
+AddDrv:
li $t2, 0xb0
jr $t2
li $t1, 0x47
-.section .text.DelDev
-.global DelDev
-.type DelDev, @function
-DelDev:
+.section .text.DelDrv
+.global DelDrv
+.type DelDrv, @function
+DelDrv:
li $t2, 0xb0
jr $t2
li $t1, 0x48
-.section .text.ListDev
-.global ListDev
-.type ListDev, @function
-ListDev:
+.section .text.ListDrv
+.global ListDrv
+.type ListDrv, @function
+ListDrv:
li $t2, 0xb0
jr $t2
li $t1, 0x49
diff --git a/libpsn00b/psxapi/fs.s b/libpsn00b/psxapi/fs.s
index f225d64..8b6d57a 100644
--- a/libpsn00b/psxapi/fs.s
+++ b/libpsn00b/psxapi/fs.s
@@ -6,12 +6,12 @@
.set noreorder
-## B0 table functions (5)
+## B0 table functions (6)
-.section .text.chdir
-.global chdir
-.type chdir, @function
-chdir:
+.section .text.cd
+.global cd
+.type cd, @function
+cd:
li $t2, 0xb0
jr $t2
li $t1, 0x40
@@ -48,3 +48,11 @@ erase:
jr $t2
li $t1, 0x45
+.section .text.undelete
+.global undelete
+.type undelete, @function
+undelete:
+ li $t2, 0xb0
+ jr $t2
+ li $t1, 0x46
+
diff --git a/libpsn00b/psxapi/stdio.s b/libpsn00b/psxapi/stdio.s
index e65f871..14c6d03 100644
--- a/libpsn00b/psxapi/stdio.s
+++ b/libpsn00b/psxapi/stdio.s
@@ -6,7 +6,7 @@
.set noreorder
-## A0 table functions (13)
+## A0 table functions (14)
.section .text.open
.global open
@@ -16,10 +16,10 @@ open:
jr $t2
li $t1, 0x00
-.section .text.seek
-.global seek
-.type seek, @function
-seek:
+.section .text.lseek
+.global lseek
+.type lseek, @function
+lseek:
li $t2, 0xa0
jr $t2
li $t1, 0x01
@@ -56,6 +56,14 @@ ioctl:
jr $t2
li $t1, 0x05
+.section .text.isatty
+.global isatty
+.type isatty, @function
+isatty:
+ li $t2, 0xa0
+ jr $t2
+ li $t1, 0x07
+
.section .text.getc
.global getc
.type getc, @function
@@ -112,3 +120,21 @@ printf:
jr $t2
li $t1, 0x3f
+## B0 table functions (2)
+
+.section .text._get_errno
+.global _get_errno
+.type _get_errno, @function
+_get_errno:
+ li $t2, 0xb0
+ jr $t2
+ li $t1, 0x54
+
+.section .text._get_error
+.global _get_error
+.type _get_error, @function
+_get_error:
+ li $t2, 0xb0
+ jr $t2
+ li $t1, 0x55
+
diff --git a/libpsn00b/psxapi/stubs.json b/libpsn00b/psxapi/stubs.json
index 50ffb55..afa83c6 100644
--- a/libpsn00b/psxapi/stubs.json
+++ b/libpsn00b/psxapi/stubs.json
@@ -8,7 +8,7 @@
{
"type": "a",
"id": 1,
- "name": "seek",
+ "name": "lseek",
"file": "stdio.s"
},
{
@@ -37,6 +37,12 @@
},
{
"type": "a",
+ "id": 7,
+ "name": "isatty",
+ "file": "stdio.s"
+ },
+ {
+ "type": "a",
"id": 8,
"name": "getc",
"file": "stdio.s"
@@ -109,6 +115,12 @@
},
{
"type": "a",
+ "id": 81,
+ "name": "LoadExec",
+ "file": "sys.s"
+ },
+ {
+ "type": "a",
"id": 85,
"name": "_bu_init",
"file": "drivers.s"
@@ -128,7 +140,7 @@
{
"type": "a",
"id": 153,
- "name": "AddDummyTty",
+ "name": "add_nullcon_driver",
"file": "drivers.s"
},
{
@@ -139,6 +151,18 @@
},
{
"type": "a",
+ "id": 157,
+ "name": "GetConf",
+ "file": "sys.s"
+ },
+ {
+ "type": "a",
+ "id": 159,
+ "name": "SetMem",
+ "file": "sys.s"
+ },
+ {
+ "type": "a",
"id": 160,
"name": "_boot",
"file": "sys.s"
@@ -170,13 +194,13 @@
{
"type": "b",
"id": 0,
- "name": "_kernel_malloc",
+ "name": "alloc_kernel_memory",
"file": "sys.s"
},
{
"type": "b",
"id": 1,
- "name": "_kernel_free",
+ "name": "free_kernel_memory",
"file": "sys.s"
},
{
@@ -296,13 +320,13 @@
{
"type": "b",
"id": 24,
- "name": "SetDefaultExitFromException",
+ "name": "ResetEntryInt",
"file": "sys.s"
},
{
"type": "b",
"id": 25,
- "name": "SetCustomExitFromException",
+ "name": "HookEntryInt",
"file": "sys.s"
},
{
@@ -314,7 +338,7 @@
{
"type": "b",
"id": 64,
- "name": "chdir",
+ "name": "cd",
"file": "fs.s"
},
{
@@ -343,20 +367,26 @@
},
{
"type": "b",
+ "id": 70,
+ "name": "undelete",
+ "file": "fs.s"
+ },
+ {
+ "type": "b",
"id": 71,
- "name": "AddDev",
+ "name": "AddDrv",
"file": "drivers.s"
},
{
"type": "b",
"id": 72,
- "name": "DelDev",
+ "name": "DelDrv",
"file": "drivers.s"
},
{
"type": "b",
"id": 73,
- "name": "ListDev",
+ "name": "ListDrv",
"file": "drivers.s"
},
{
@@ -397,6 +427,18 @@
},
{
"type": "b",
+ "id": 84,
+ "name": "_get_errno",
+ "file": "stdio.s"
+ },
+ {
+ "type": "b",
+ "id": 85,
+ "name": "_get_error",
+ "file": "stdio.s"
+ },
+ {
+ "type": "b",
"id": 86,
"name": "GetC0Table",
"file": "sys.s"
diff --git a/libpsn00b/psxapi/sys.s b/libpsn00b/psxapi/sys.s
index e2505e1..40dcdff 100644
--- a/libpsn00b/psxapi/sys.s
+++ b/libpsn00b/psxapi/sys.s
@@ -6,7 +6,7 @@
.set noreorder
-## A0 table functions (8)
+## A0 table functions (11)
.section .text.b_setjmp
.global b_setjmp
@@ -48,6 +48,14 @@ FlushCache:
jr $t2
li $t1, 0x44
+.section .text.LoadExec
+.global LoadExec
+.type LoadExec, @function
+LoadExec:
+ li $t2, 0xa0
+ jr $t2
+ li $t1, 0x51
+
.section .text.SetConf
.global SetConf
.type SetConf, @function
@@ -56,6 +64,22 @@ SetConf:
jr $t2
li $t1, 0x9c
+.section .text.GetConf
+.global GetConf
+.type GetConf, @function
+GetConf:
+ li $t2, 0xa0
+ jr $t2
+ li $t1, 0x9d
+
+.section .text.SetMem
+.global SetMem
+.type SetMem, @function
+SetMem:
+ li $t2, 0xa0
+ jr $t2
+ li $t1, 0x9f
+
.section .text._boot
.global _boot
.type _boot, @function
@@ -74,18 +98,18 @@ GetSystemInfo:
## B0 table functions (27)
-.section .text._kernel_malloc
-.global _kernel_malloc
-.type _kernel_malloc, @function
-_kernel_malloc:
+.section .text.alloc_kernel_memory
+.global alloc_kernel_memory
+.type alloc_kernel_memory, @function
+alloc_kernel_memory:
li $t2, 0xb0
jr $t2
li $t1, 0x00
-.section .text._kernel_free
-.global _kernel_free
-.type _kernel_free, @function
-_kernel_free:
+.section .text.free_kernel_memory
+.global free_kernel_memory
+.type free_kernel_memory, @function
+free_kernel_memory:
li $t2, 0xb0
jr $t2
li $t1, 0x01
@@ -242,18 +266,18 @@ ReturnFromException:
jr $t2
li $t1, 0x17
-.section .text.SetDefaultExitFromException
-.global SetDefaultExitFromException
-.type SetDefaultExitFromException, @function
-SetDefaultExitFromException:
+.section .text.ResetEntryInt
+.global ResetEntryInt
+.type ResetEntryInt, @function
+ResetEntryInt:
li $t2, 0xb0
jr $t2
li $t1, 0x18
-.section .text.SetCustomExitFromException
-.global SetCustomExitFromException
-.type SetCustomExitFromException, @function
-SetCustomExitFromException:
+.section .text.HookEntryInt
+.global HookEntryInt
+.type HookEntryInt, @function
+HookEntryInt:
li $t2, 0xb0
jr $t2
li $t1, 0x19
diff --git a/libpsn00b/psxcd/cdread.c b/libpsn00b/psxcd/cdread.c
index d211a01..1adc255 100644
--- a/libpsn00b/psxcd/cdread.c
+++ b/libpsn00b/psxcd/cdread.c
@@ -89,6 +89,8 @@ static int _poll_retry(void) {
/* Public API */
int CdReadRetry(int sectors, uint32_t *buf, int mode, int attempts) {
+ _sdk_validate_args((sectors > 0) && buf && (attempts > 0), -1);
+
if (CdReadSync(1, 0) > 0) {
_sdk_log("CdRead() failed, another read in progress (%d sectors pending)\n", _pending_sectors);
return 0;
diff --git a/libpsn00b/psxcd/common.c b/libpsn00b/psxcd/common.c
index 8b8030b..461ab91 100644
--- a/libpsn00b/psxcd/common.c
+++ b/libpsn00b/psxcd/common.c
@@ -208,6 +208,9 @@ int CdInit(void) {
BUS_CD_CFG = 0x00020943;
+ SetDMAPriority(DMA_CD, 3);
+ DMA_CHCR(DMA_CD) = 0x00000000; // Stop DMA
+
CD_REG(0) = 1;
CD_REG(3) = 0x1f; // Acknowledge all IRQs
CD_REG(2) = 0x1f; // Enable all IRQs
@@ -217,9 +220,6 @@ int CdInit(void) {
CdlATV mix = { 0x80, 0x00, 0x80, 0x00 };
CdMix(&mix);
- DMA_DPCR |= 0x0000b000; // Enable DMA3
- DMA_CHCR(DMA_CD) = 0x00000000; // Stop DMA3
-
_last_mode = 0;
_ack_pending = 0;
_sync_pending = 0;
@@ -244,6 +244,8 @@ int CdInit(void) {
/* Low-level command API */
int CdCommandF(CdlCommand cmd, const void *param, int length) {
+ _sdk_validate_args(param || (length <= 0), -1);
+
const uint8_t *_param = (const uint8_t *) param;
_last_command = (uint8_t) cmd;
@@ -283,7 +285,7 @@ int CdCommandF(CdlCommand cmd, const void *param, int length) {
__asm__ volatile("");
CD_REG(0) = 0;
- for (; length; length--)
+ for (; length > 0; length--)
CD_REG(2) = *(_param++);
CD_REG(0) = 0;
@@ -292,6 +294,8 @@ int CdCommandF(CdlCommand cmd, const void *param, int length) {
}
int CdCommand(CdlCommand cmd, const void *param, int length, uint8_t *result) {
+ _sdk_validate_args(param || (length <= 0), -1);
+
/*if (_ack_pending) {
_sdk_log("CdCommand(0x%02x) failed, drive busy\n", cmd);
return 0;
@@ -329,8 +333,10 @@ int CdControlF(CdlCommand cmd, const void *param) {
} else {
// The command takes a mandatory parameter or no parameter.
length = flags & 3;
- if (length && !param)
+ if (length && !param) {
+ _sdk_log("CdControl() param is required for command 0x%02x\n", cmd);
return -1;
+ }
}
return CdCommandF(cmd, param, length);
diff --git a/libpsn00b/psxcd/isofs.c b/libpsn00b/psxcd/isofs.c
index 0ac782b..31ed00c 100644
--- a/libpsn00b/psxcd/isofs.c
+++ b/libpsn00b/psxcd/isofs.c
@@ -92,7 +92,7 @@ static int _CdReadIsoDescriptor(int session_offs)
// Verify if volume descriptor is present
descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff;
- if( strncmp("CD001", descriptor->header.id, 5) )
+ if( memcmp("CD001", descriptor->header.id, 5) )
{
_sdk_log("Disc does not contain a ISO9660 file system.\n");
@@ -211,7 +211,7 @@ static int _CdReadIsoDirectory(int lba)
return 0;
}
-#ifndef NDEBUG
+#if 0
static void dump_directory(void)
{
@@ -228,8 +228,12 @@ static void dump_directory(void)
{
dir_entry = (ISO_DIR_ENTRY*)(_cd_iso_directory_buff+dir_pos);
- strncpy(namebuff,
- _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), dir_entry->identifierLen);
+ memcpy(
+ namebuff,
+ _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY),
+ dir_entry->identifierLen
+ );
+ namebuff[dir_entry->identifierLen] = 0;
_sdk_log("P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff);
@@ -271,9 +275,12 @@ static void dump_pathtable(void)
while( (int)(tbl_pos-_cd_iso_pathtable_buff) <
descriptor->pathTableSize.lsb )
{
- strncpy(namebuff,
+ memcpy(
+ namebuff,
tbl_pos+sizeof(ISO_PATHTABLE_ENTRY),
- tbl_entry->nameLength);
+ tbl_entry->nameLength
+ );
+ namebuff[tbl_entry->nameLength] = 0;
_sdk_log("%s\n", namebuff);
@@ -308,9 +315,12 @@ static int get_pathtable_entry(int entry, ISO_PATHTABLE_ENTRY *tbl, char *namebu
{
if( namebuff )
{
- strncpy(namebuff,
+ memcpy(
+ namebuff,
tbl_pos+sizeof(ISO_PATHTABLE_ENTRY),
- tbl_entry->nameLength);
+ tbl_entry->nameLength
+ );
+ namebuff[tbl_entry->nameLength] = 0;
}
if( tbl )
@@ -381,9 +391,12 @@ static int find_dir_entry(const char *name, ISO_DIR_ENTRY *dirent)
if( !(dir_entry->flags & 0x2) )
{
- strncpy(namebuff,
+ memcpy(
+ namebuff,
_cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY),
- dir_entry->identifierLen);
+ dir_entry->identifierLen
+ );
+ namebuff[dir_entry->identifierLen] = 0;
if( strcmp(namebuff, name) == 0 )
{
@@ -422,7 +435,8 @@ static char* get_pathname(char *path, const char *filename)
return NULL;
}
- strncpy(path, filename, (int)(c-filename));
+ memcpy(path, filename, c - filename);
+ path[c - filename] = 0;
return path;
}
@@ -450,6 +464,8 @@ static char* get_filename(char *name, const char *filename)
CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
{
+ _sdk_validate_args(fp && filename, NULL);
+
int i,j,found_dir,num_dirs;
int dir_len;
char tpath_rbuff[128];
@@ -553,6 +569,8 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
CdlDIR *CdOpenDir(const char* path)
{
+ _sdk_validate_args(path, NULL);
+
CdlDIR_INT* dir;
int num_dirs;
int i,found_dir;
@@ -605,7 +623,11 @@ CdlDIR *CdOpenDir(const char* path)
_sdk_log( "Directory LBA = %d\n", tbl_entry.dirOffs );
_CdReadIsoDirectory( tbl_entry.dirOffs );
-
+
+#ifndef NDEBUG
+ //dump_directory();
+#endif
+
dir = (CdlDIR_INT*)malloc( sizeof(CdlDIR_INT) );
dir->_len = _cd_iso_directory_len;
@@ -631,6 +653,8 @@ CdlDIR *CdOpenDir(const char* path)
int CdReadDir(CdlDIR *dir, CdlFILE* file)
{
+ _sdk_validate_args(dir && file, 0);
+
CdlDIR_INT* d_dir;
ISO_DIR_ENTRY* dir_entry;
@@ -658,9 +682,12 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file)
}
else
{
- strncpy( file->name,
+ memcpy(
+ file->name,
d_dir->_dir+d_dir->_pos+sizeof(ISO_DIR_ENTRY),
- dir_entry->identifierLen );
+ dir_entry->identifierLen
+ );
+ file->name[dir_entry->identifierLen] = 0;
}
CdIntToPos( dir_entry->entryOffs.lsb, &file->pos );
@@ -683,6 +710,9 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file)
void CdCloseDir(CdlDIR *dir)
{
+ if (!dir)
+ return;
+
CdlDIR_INT* d_dir;
d_dir = (CdlDIR_INT*)dir;
@@ -698,6 +728,8 @@ int CdIsoError()
int CdGetVolumeLabel(char *label)
{
+ _sdk_validate_args(label, -1);
+
int i, length = 31;
ISO_DESCRIPTOR* descriptor;
@@ -735,7 +767,7 @@ static void _scan_callback(CdlIntrResult status, unsigned char *result)
if( _ses_scanbuff[0] == 0x1 )
{
- if( strncmp((const char*)_ses_scanbuff+1, "CD001", 5) == 0 )
+ if( memcmp((const char*)_ses_scanbuff+1, "CD001", 5) == 0 )
{
CdControlF(CdlPause, 0);
_ses_scancomplete = 1;
@@ -761,6 +793,8 @@ static void _scan_callback(CdlIntrResult status, unsigned char *result)
int CdLoadSession(int session)
{
+ _sdk_validate_args(session >= 0, -1);
+
CdlLOC *loc;
CdlCB ready_oldcb;
char scanbuff[2048];
diff --git a/libpsn00b/psxcd/misc.c b/libpsn00b/psxcd/misc.c
index 8fd2a4d..2f04821 100644
--- a/libpsn00b/psxcd/misc.c
+++ b/libpsn00b/psxcd/misc.c
@@ -12,15 +12,29 @@
#define DATA_SYNC_TIMEOUT 0x100000
-/* Private types */
-
-typedef struct {
- uint8_t status, first_track, last_track;
-} TrackInfo;
+/* Unlock command strings */
+
+static const char *_unlock_strings[] = {
+ "",
+ "Licensed by",
+ "Sony",
+ "Computer",
+ "Entertainment",
+ "",
+ ""
+};
+
+static const char *const _unlock_regions[] = {
+ "of America", // CdlRegionSCEA
+ "(Europe)", // CdlRegionSCEE
+ "World wide" // CdlRegionSCEW
+};
/* Sector DMA transfer functions */
int CdGetSector(void *madr, int size) {
+ _sdk_validate_args(madr && (size > 0), 0);
+
//while (!(CD_REG(0) & (1 << 6)))
//__asm__ volatile("");
@@ -35,6 +49,8 @@ int CdGetSector(void *madr, int size) {
}
int CdGetSector2(void *madr, int size) {
+ _sdk_validate_args(madr && (size > 0), 0);
+
//while (!(CD_REG(0) & (1 << 6)))
//__asm__ volatile("");
@@ -54,7 +70,7 @@ int CdDataSync(int mode) {
return 0;
}
- _sdk_log("CdDataSync() timeout\n");
+ _sdk_log("CdDataSync() timeout, CHCR=0x%08x\n", DMA_CHCR(DMA_CD));
return -1;
}
@@ -77,52 +93,40 @@ int CdPosToInt(const CdlLOC *p) {
) - 150;
}
-/* Misc. functions */
-
-int CdGetToc(CdlLOC *toc) {
- TrackInfo track_info;
-
- if (!CdCommand(CdlGetTN, 0, 0, (uint8_t *) &track_info))
- return 0;
- if (CdSync(1, 0) != CdlComplete)
- return 0;
-
- int first = btoi(track_info.first_track);
- int tracks = btoi(track_info.last_track) + 1 - first;
- //assert(first == 1);
+/* Drive unlocking API */
- for (int i = 0; i < tracks; i++) {
- uint8_t track = itob(first + i);
+CdlRegionCode CdGetRegion(void) {
+ uint8_t param;
+ uint8_t result[16];
- if (!CdCommand(CdlGetTD, &track, 1, (uint8_t *) &toc[i]))
- return 0;
- if (CdSync(1, 0) != CdlComplete)
- return 0;
+ // Firmware version C0 does not support test command 0x22 to retrieve the
+ // region, but it was only used in the SCPH-1000 Japanese model. Version D1
+ // (and possibly others?) is used in debug consoles.
+ // https://psx-spx.consoledev.net/cdromdrive/#19h20h-int3yymmddver
+ // https://psx-spx.consoledev.net/cdromdrive/#19h22h-int3for-europe
+ param = 0x20;
+ memset(result, 0, 4);
- toc[i].sector = 0;
- toc[i].track = track;
+ if (!CdCommand(CdlTest, &param, 1, result)) {
+ _sdk_log("failed to probe drive firmware version\n");
+ return CdlRegionUnknown;
}
- return tracks;
-}
-
-CdlRegionCode CdGetRegion(void) {
- uint8_t param = 0x22;
- uint8_t result[16];
+ _sdk_log("drive firmware version: 0x%02x\n", result[3]);
+ if (result[3] == 0xc0)
+ return CdlRegionSCEI;
+ if (result[3] >= 0xd0)
+ return CdlRegionDebug;
- // Test command 0x22 is unsupported in firmware version C0, which was used
- // exclusively in the SCPH-1000 Japanese model. It's thus safe to assume
- // that the console is Japanese if the command returns a valid error.
- // https://psx-spx.consoledev.net/cdromdrive/#19h22h-int3for-europe
+ param = 0x22;
memset(result, 0, 16);
if (!CdCommand(CdlTest, &param, 1, result)) {
_sdk_log("failed to probe drive region\n");
- return (result[1] == 0x10) ? CdlRegionSCEI : CdlRegionUnknown;
+ return CdlRegionUnknown;
}
_sdk_log("drive region: %s\n", result);
-
if (!strcmp(result, "for Japan"))
return CdlRegionSCEI;
if (!strcmp(result, "for U/C"))
@@ -137,7 +141,72 @@ CdlRegionCode CdGetRegion(void) {
return CdlRegionUnknown;
}
+int CdUnlock(CdlRegionCode region) {
+ if (region <= CdlRegionSCEI)
+ return 0;
+ if (region >= CdlRegionDebug)
+ return 1;
+
+ // This is by far the most efficient way to do it.
+ _unlock_strings[5] = _unlock_regions[region - CdlRegionSCEA];
+
+ for (int i = 0; i < 7; i++) {
+ uint8_t result[4];
+
+ if (!CdCommand(
+ 0x50 + i,
+ _unlock_strings[i],
+ strlen(_unlock_strings[i]),
+ result
+ ))
+ return 0;
+
+ if (!(result[0] & CdlStatError) || (result[1] != 0x40)) {
+ _sdk_log("unlock failed, status=0x%02x, code=0x%02x\n", result[0], result[1]);
+ return 0;
+ }
+ }
+
+ _sdk_log("unlock successful\n");
+ return CdCommand(CdlNop, 0, 0, 0);
+}
+
+/* Misc. functions */
+
+int CdGetToc(CdlLOC *toc) {
+ _sdk_validate_args(toc, 0);
+
+ uint8_t result[4];
+
+ if (!CdCommand(CdlGetTN, 0, 0, result))
+ return 0;
+ if (CdSync(1, 0) != CdlComplete)
+ return 0;
+
+ int first = btoi(result[1]);
+ int tracks = btoi(result[2]) + 1 - first;
+ //assert(first == 1);
+
+ for (int i = 0; i < tracks; i++) {
+ uint8_t track = itob(first + i);
+
+ if (!CdCommand(CdlGetTD, &track, 1, result))
+ return 0;
+ if (CdSync(1, 0) != CdlComplete)
+ return 0;
+
+ toc[i].minute = result[1];
+ toc[i].second = result[2];
+ toc[i].sector = 0;
+ toc[i].track = track;
+ }
+
+ return tracks;
+}
+
int CdMix(const CdlATV *vol) {
+ _sdk_validate_args(vol, 0);
+
CD_REG(0) = 2;
CD_REG(2) = vol->val0;
CD_REG(3) = vol->val1;
diff --git a/libpsn00b/psxetc/dl.c b/libpsn00b/psxetc/dl.c
index ff712eb..06302e2 100644
--- a/libpsn00b/psxetc/dl.c
+++ b/libpsn00b/psxetc/dl.c
@@ -112,6 +112,8 @@ static uint32_t _elf_hash(const char *str) {
/* Symbol map loading/introspection API */
int DL_InitSymbolMap(int num_entries) {
+ _sdk_validate_args(num_entries, -1);
+
if (_symbol_map.entries)
DL_UnloadSymbolMap();
@@ -151,6 +153,8 @@ void DL_UnloadSymbolMap(void) {
}
void DL_AddMapSymbol(const char *name, void *ptr) {
+ _sdk_validate_args_void(name);
+
uint32_t hash = _elf_hash(name);
int index = _symbol_map.index;
_symbol_map.index = index + 1;
@@ -168,6 +172,8 @@ void DL_AddMapSymbol(const char *name, void *ptr) {
}
int DL_ParseSymbolMap(const char *ptr, size_t size) {
+ _sdk_validate_args(ptr && size, 0);
+
int entries = 0;
// Perform a quick scan over the entire map text and count the number of
@@ -232,6 +238,8 @@ int DL_ParseSymbolMap(const char *ptr, size_t size) {
}
void *DL_GetMapSymbol(const char *name) {
+ _sdk_validate_args(name, 0);
+
if (!_symbol_map.entries) {
_sdk_log("DL_GetMapSymbol() with no map loaded\n");
return 0;
@@ -275,8 +283,7 @@ void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) {
/* Library loading and linking API */
DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode) {
- if (!dll || !ptr)
- return 0;
+ _sdk_validate_args(dll && ptr && size, 0);
dll->ptr = ptr;
dll->malloc_ptr = (mode & DL_FREE_ON_DESTROY) ? ptr : 0;
@@ -463,6 +470,8 @@ void DL_DestroyDLL(DLL *dll) {
}
void *DL_GetDLLSymbol(const DLL *dll, const char *name) {
+ _sdk_validate_args(name, 0);
+
if (!dll)
return DL_GetMapSymbol(name);
//return _dl_resolve_callback(0, name);
diff --git a/libpsn00b/psxetc/interrupts.c b/libpsn00b/psxetc/interrupts.c
index f2a273c..8bd11fc 100644
--- a/libpsn00b/psxetc/interrupts.c
+++ b/libpsn00b/psxetc/interrupts.c
@@ -4,6 +4,7 @@
*/
#include <stdint.h>
+#include <assert.h>
#include <psxapi.h>
#include <psxetc.h>
#include <hwregs_c.h>
@@ -99,8 +100,7 @@ static void _global_dma_handler(void) {
/* IRQ and DMA handler API */
void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) {
- if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS))
- return 0;
+ _sdk_validate_args((irq >= 0) && (irq < NUM_IRQ_CHANNELS), 0);
void *old_callback = _irq_handlers[irq];
_irq_handlers[irq] = func;
@@ -116,15 +116,13 @@ void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) {
}
void *GetInterruptCallback(IRQ_Channel irq) {
- if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS))
- return 0;
+ _sdk_validate_args((irq >= 0) && (irq < NUM_IRQ_CHANNELS), 0);
return _irq_handlers[irq];
}
void *DMACallback(DMA_Channel dma, void (*func)(void)) {
- if ((dma < 0) || (dma >= NUM_DMA_CHANNELS))
- return 0;
+ _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), 0);
void *old_callback = _dma_handlers[dma];
_dma_handlers[dma] = func;
@@ -151,12 +149,34 @@ void *DMACallback(DMA_Channel dma, void (*func)(void)) {
}
void *GetDMACallback(DMA_Channel dma) {
- if ((dma < 0) || (dma >= NUM_DMA_CHANNELS))
- return 0;
+ _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), 0);
return _dma_handlers[dma];
}
+/* DMA channel priority API */
+
+int SetDMAPriority(DMA_Channel dma, int priority) {
+ _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), -1);
+
+ uint32_t dpcr = DMA_DPCR;
+ uint32_t channel = dpcr >> (dma * 4);
+
+ dpcr &= ~(0xf << (dma * 4));
+ if (priority >= 0)
+ dpcr |= ((priority & 7) | 8) << (dma * 4);
+
+ DMA_DPCR = dpcr;
+ return (channel & 8) ? (channel & 7) : -1;
+}
+
+int GetDMAPriority(DMA_Channel dma) {
+ _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), -1);
+
+ uint32_t channel = DMA_DPCR >> (dma * 4);
+ return (channel & 8) ? (channel & 7) : -1;
+}
+
/* Hook installation/removal API */
int ResetCallback(void) {
@@ -190,7 +210,7 @@ void RestartCallback(void) {
// Install the ISR hook and prevent the kernel's internal handlers from
// automatically acknowledging SPI and timer IRQs.
- SetCustomExitFromException(&_isr_jmp_buf);
+ HookEntryInt(&_isr_jmp_buf);
ChangeClearPAD(0);
ChangeClearRCnt(0, 0);
ChangeClearRCnt(1, 0);
@@ -217,7 +237,7 @@ void StopCallback(void) {
DMA_DPCR = _saved_dma_dpcr & 0x07777777;
DMA_DICR = 0;
- SetDefaultExitFromException();
+ ResetEntryInt();
ChangeClearPAD(1);
ChangeClearRCnt(0, 1);
ChangeClearRCnt(1, 1);
diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c
index e41bd31..7e0758b 100644
--- a/libpsn00b/psxgpu/common.c
+++ b/libpsn00b/psxgpu/common.c
@@ -1,6 +1,6 @@
/*
* PSn00bSDK GPU library (common functions)
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*/
#include <stdint.h>
@@ -10,9 +10,8 @@
#include <psxgpu.h>
#include <hwregs_c.h>
-#define QUEUE_LENGTH 16
-#define DMA_CHUNK_LENGTH 8
-#define VSYNC_TIMEOUT 0x100000
+#define QUEUE_LENGTH 16
+#define VSYNC_TIMEOUT 0x100000
static void _default_vsync_halt(void);
@@ -21,7 +20,7 @@ static void _default_vsync_halt(void);
typedef struct {
void (*func)(uint32_t, uint32_t, uint32_t);
uint32_t arg1, arg2, arg3;
-} QueueEntry;
+} DrawOp;
/* Internal globals */
@@ -31,10 +30,10 @@ static void (*_vsync_halt_func)(void) = &_default_vsync_halt;
static void (*_vsync_callback)(void) = (void *) 0;
static void (*_drawsync_callback)(void) = (void *) 0;
-static volatile QueueEntry _draw_queue[QUEUE_LENGTH];
-static volatile uint8_t _queue_head, _queue_tail, _queue_length;
-static volatile uint32_t _vblank_counter;
-static volatile uint16_t _last_hblank;
+static volatile DrawOp _draw_queue[QUEUE_LENGTH];
+static volatile uint8_t _queue_head, _queue_tail, _queue_length, _drawop_type;
+static volatile uint32_t _vblank_counter, _last_vblank;
+static volatile uint16_t _last_hblank;
/* Private interrupt handlers */
@@ -45,16 +44,16 @@ static void _vblank_handler(void) {
_vsync_callback();
}
-static void _gpu_dma_handler(void) {
- //while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24)))
- while (!(GPU_GP1 & (1 << 26)))
- __asm__ volatile("");
+static void _process_drawop(void) {
+ int length = _queue_length;
+ if (!length)
+ return;
- if (--_queue_length) {
+ if (--length) {
int head = _queue_head;
_queue_head = (head + 1) % QUEUE_LENGTH;
- volatile QueueEntry *entry = &_draw_queue[head];
+ volatile DrawOp *entry = &_draw_queue[head];
entry->func(entry->arg1, entry->arg2, entry->arg3);
} else {
GPU_GP1 = 0x04000000; // Disable DMA request
@@ -62,16 +61,36 @@ static void _gpu_dma_handler(void) {
if (_drawsync_callback)
_drawsync_callback();
}
+
+ _queue_length = length;
+}
+
+static void _gpu_irq_handler(void) {
+ GPU_GP1 = 0x02000000; // Reset IRQ
+
+ if (_drawop_type == DRAWOP_TYPE_GPU_IRQ)
+ _process_drawop();
+}
+
+static void _gpu_dma_handler(void) {
+ if (_drawop_type == DRAWOP_TYPE_DMA)
+ _process_drawop();
}
/* GPU reset and system initialization */
void ResetGraph(int mode) {
+ _queue_head = 0;
+ _queue_tail = 0;
+ _queue_length = 0;
+ _drawop_type = 0;
+
// Perform some basic system initialization when ResetGraph() is called for
// the first time.
if (!ResetCallback()) {
EnterCriticalSection();
InterruptCallback(IRQ_VBLANK, &_vblank_handler);
+ InterruptCallback(IRQ_GPU, &_gpu_irq_handler);
DMACallback(DMA_GPU, &_gpu_dma_handler);
_gpu_video_mode = (GPU_GP1 >> 20) & 1;
@@ -80,28 +99,27 @@ void ResetGraph(int mode) {
_sdk_log("setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC");
}
- if (mode == 3) {
+ if (mode) {
GPU_GP1 = 0x01000000; // Reset command buffer
- return;
- }
-
- DMA_DPCR |= 0x0b000b00; // Enable DMA2 and DMA6
- DMA_CHCR(2) = 0x00000201; // Stop DMA2
- DMA_CHCR(6) = 0x00000200; // Stop DMA6
+ GPU_GP1 = 0x02000000; // Reset IRQ
+ GPU_GP1 = 0x04000000; // Disable DMA request
- if (mode == 1) {
- GPU_GP1 = 0x01000000; // Reset command buffer
- return;
+ if (mode == 1)
+ return;
+ } else {
+ GPU_GP1 = 0x00000000; // Reset GPU
}
- GPU_GP1 = 0x00000000; // Reset GPU
+ SetDMAPriority(DMA_GPU, 3);
+ SetDMAPriority(DMA_OTC, 3);
+ DMA_CHCR(DMA_GPU) = 0x00000201; // Stop DMA
+ DMA_CHCR(DMA_OTC) = 0x00000200; // Stop DMA
+
TIMER_CTRL(0) = 0x0500;
TIMER_CTRL(1) = 0x0500;
- _queue_head = 0;
- _queue_tail = 0;
- _queue_length = 0;
_vblank_counter = 0;
+ _last_vblank = 0;
_last_hblank = 0;
}
@@ -127,10 +145,13 @@ int VSync(int mode) {
if (mode < 0)
return _vblank_counter;
- uint32_t status = GPU_GP1;
+ // Wait for the specified number of vertical blank events since the last
+ // call to VSync() to occur (if mode >= 2) or just for a single vertical
+ // blank (if mode = 0).
+ uint32_t target = mode ? (_last_vblank + mode) : (_vblank_counter + 1);
- // Wait for at least one vertical blank event to occur.
- do {
+ while (_vblank_counter < target) {
+ uint32_t status = GPU_GP1;
_vsync_halt_func();
// If interlaced mode is enabled, wait until the GPU starts displaying
@@ -139,9 +160,11 @@ int VSync(int mode) {
while (!((GPU_GP1 ^ status) & (1 << 31)))
__asm__ volatile("");
}
- } while ((--mode) > 0);
+ }
+ _last_vblank = _vblank_counter;
_last_hblank = TIMER_VALUE(1);
+
return delta;
}
@@ -167,14 +190,13 @@ void *VSyncCallback(void (*func)(void)) {
/* Command queue API */
-// This function is normally only used internally, but it is exposed for
-// advanced use cases.
-int EnqueueDrawOp(
- void (*func)(uint32_t, uint32_t, uint32_t),
- uint32_t arg1,
- uint32_t arg2,
- uint32_t arg3
-) {
+void SetDrawOpType(GPU_DrawOpType type) {
+ _drawop_type = type;
+}
+
+int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3) {
+ _sdk_validate_args(func, -1);
+
// If GPU DMA is currently busy, append the command to the queue instead of
// executing it immediately. Note that interrupts must be disabled *prior*
// to checking if DMA is busy; disabling them afterwards would create a
@@ -202,7 +224,7 @@ int EnqueueDrawOp(
_queue_tail = (tail + 1) % QUEUE_LENGTH;
_queue_length = length + 1;
- volatile QueueEntry *entry = &_draw_queue[tail];
+ volatile DrawOp *entry = &_draw_queue[tail];
entry->func = func;
entry->arg1 = arg1;
entry->arg2 = arg2;
@@ -225,7 +247,7 @@ int DrawSync(int mode) {
if (!_queue_length) {
// Wait for any DMA transfer to finish if DMA is enabled.
if (GPU_GP1 & (3 << 29)) {
- while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24)))
+ while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(DMA_GPU) & (1 << 24)))
__asm__ volatile("");
}
@@ -248,88 +270,17 @@ void *DrawSyncCallback(void (*func)(void)) {
return old_callback;
}
-/* OT and primitive drawing API */
-
-void ClearOTagR(uint32_t *ot, size_t length) {
- DMA_MADR(6) = (uint32_t) &ot[length - 1];
- DMA_BCR(6) = length & 0xffff;
- DMA_CHCR(6) = 0x11000002;
+/* Queue pause/resume API */
- while (DMA_CHCR(6) & (1 << 24))
- __asm__ volatile("");
-}
+int IsIdleGPU(int timeout) {
+ if (timeout <= 0)
+ timeout = 1;
-void ClearOTag(uint32_t *ot, size_t length) {
- // DMA6 only supports writing to RAM in reverse order (last to first), so
- // the OT has to be cleared in software here. This function is thus much
- // slower than ClearOTagR().
- // https://problemkaputt.de/psx-spx.htm#dmachannels
- for (int i = 0; i < (length - 1); i++)
- ot[i] = (uint32_t) &ot[i + 1] & 0x00ffffff;
-
- ot[length - 1] = 0x00ffffff;
-}
-
-void AddPrim(uint32_t *ot, const void *pri) {
- addPrim(ot, pri);
-}
-
-void DrawPrim(const uint32_t *pri) {
- size_t length = getlen(pri);
-
- DrawSync(0);
- GPU_GP1 = 0x04000002;
-
- // NOTE: if length >= DMA_CHUNK_LENGTH then it also has to be a multiple of
- // DMA_CHUNK_LENGTH, otherwise the DMA channel will get stuck waiting for
- // more data indefinitely.
- DMA_MADR(2) = (uint32_t) &pri[1];
- if (length < DMA_CHUNK_LENGTH)
- DMA_BCR(2) = 0x00010000 | length;
- else
- DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
-
- DMA_CHCR(2) = 0x01000201;
-}
-
-int DrawOTag(const uint32_t *ot) {
- return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) ot, 0, 0);
-}
-
-void DrawOTag2(const uint32_t *ot) {
- GPU_GP1 = 0x04000002;
-
- while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24)))
- __asm__ volatile("");
-
- DMA_MADR(2) = (uint32_t) ot;
- DMA_BCR(2) = 0;
- DMA_CHCR(2) = 0x01000401;
-}
-
-/* Misc. functions */
-
-GPU_VideoMode GetVideoMode(void) {
- return _gpu_video_mode;
-}
-
-void SetVideoMode(GPU_VideoMode mode) {
- uint32_t _mode, stat = GPU_GP1;
-
- _gpu_video_mode = mode & 1;
-
- _mode = (mode & 1) << 3;
- _mode |= (stat >> 17) & 0x37; // GPUSTAT 17-22 -> cmd bits 0-5
- _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> cmd bit 6
- _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> cmd bit 7
-
- GPU_GP1 = 0x08000000 | mode;
-}
-
-int GetODE(void) {
- return (GPU_GP1 >> 31);
-}
+ for (; timeout; timeout--) {
+ if (GPU_GP1 & (1 << 26))
+ return 0;
+ }
-void SetDispMask(int mask) {
- GPU_GP1 = 0x03000000 | (mask ? 0 : 1);
+ //_sdk_log("IsIdleGPU() timeout\n");
+ return -1;
}
diff --git a/libpsn00b/psxgpu/drawing.c b/libpsn00b/psxgpu/drawing.c
new file mode 100644
index 0000000..161b2f7
--- /dev/null
+++ b/libpsn00b/psxgpu/drawing.c
@@ -0,0 +1,148 @@
+/*
+ * PSn00bSDK GPU library (drawing/display list functions)
+ * (C) 2022-2023 spicyjpeg - MPL licensed
+ */
+
+#include <stdint.h>
+#include <assert.h>
+#include <psxetc.h>
+#include <psxgpu.h>
+#include <hwregs_c.h>
+
+/* Private utilities */
+
+// This function is actually referenced in env.c as well, so it can't be static.
+void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot) {
+ SetDrawOpType(type);
+ GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0
+
+ while (DMA_CHCR(DMA_GPU) & (1 << 24))
+ __asm__ volatile("");
+
+ DMA_MADR(DMA_GPU) = (uint32_t) ot;
+ DMA_BCR(DMA_GPU) = 0;
+ DMA_CHCR(DMA_GPU) = 0x01000401;
+}
+
+static void _send_buffer(
+ GPU_DrawOpType type, const uint32_t *buf, size_t length
+) {
+ SetDrawOpType(type);
+ GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0
+
+ while (DMA_CHCR(DMA_GPU) & (1 << 24))
+ __asm__ volatile("");
+
+ DMA_MADR(DMA_GPU) = (uint32_t) buf;
+ DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16);
+ DMA_CHCR(DMA_GPU) = 0x01000201;
+}
+
+/* Buffer and primitive drawing API */
+
+int DrawOTag(const uint32_t *ot) {
+ _sdk_validate_args(ot, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_DMA,
+ (uint32_t) ot,
+ 0
+ );
+}
+
+int DrawOTagIRQ(const uint32_t *ot) {
+ _sdk_validate_args(ot, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_GPU_IRQ,
+ (uint32_t) ot,
+ 0
+ );
+}
+
+int DrawBuffer(const uint32_t *buf, size_t length) {
+ _sdk_validate_args(buf && length && (length <= 0xffff), -1);
+
+ return EnqueueDrawOp(
+ (void *) &DrawBuffer2,
+ (uint32_t) DRAWOP_TYPE_DMA,
+ (uint32_t) buf,
+ (uint32_t) length
+ );
+}
+
+int DrawBufferIRQ(const uint32_t *buf, size_t length) {
+ _sdk_validate_args(buf && length && (length <= 0xffff), -1);
+
+ return EnqueueDrawOp(
+ (void *) &DrawBuffer2,
+ (uint32_t) DRAWOP_TYPE_GPU_IRQ,
+ (uint32_t) buf,
+ (uint32_t) length
+ );
+}
+
+void DrawOTag2(const uint32_t *ot) {
+ _sdk_validate_args_void(ot);
+
+ _send_linked_list(DRAWOP_TYPE_DMA, ot);
+}
+
+void DrawOTagIRQ2(const uint32_t *ot) {
+ _sdk_validate_args_void(ot);
+
+ _send_linked_list(DRAWOP_TYPE_GPU_IRQ, ot);
+}
+
+void DrawBuffer2(const uint32_t *buf, size_t length) {
+ _sdk_validate_args_void(buf && length && (length <= 0xffff));
+
+ _send_buffer(DRAWOP_TYPE_DMA, buf, length);
+}
+
+void DrawBufferIRQ2(const uint32_t *buf, size_t length) {
+ _sdk_validate_args_void(buf && length && (length <= 0xffff));
+
+ _send_buffer(DRAWOP_TYPE_GPU_IRQ, buf, length);
+}
+
+void DrawPrim(const uint32_t *pri) {
+ _sdk_validate_args_void(pri);
+
+ DrawSync(0);
+ DrawBuffer2(&pri[1], getlen(pri));
+}
+
+/* Helper functions */
+
+void ClearOTagR(uint32_t *ot, size_t length) {
+ _sdk_validate_args_void(ot && length);
+
+ DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1];
+ DMA_BCR(DMA_OTC) = length & 0xffff;
+ DMA_CHCR(DMA_OTC) = 0x11000002;
+
+ while (DMA_CHCR(DMA_OTC) & (1 << 24))
+ __asm__ volatile("");
+}
+
+void ClearOTag(uint32_t *ot, size_t length) {
+ _sdk_validate_args_void(ot && length);
+
+ // DMA6 only supports writing to RAM in reverse order (last to first), so
+ // the OT has to be cleared in software here. This function is thus much
+ // slower than ClearOTagR().
+ // https://problemkaputt.de/psx-spx.htm#dmachannels
+ for (int i = 0; i < (length - 1); i++)
+ ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff;
+
+ ot[length - 1] = 0xffffff;
+}
+
+void AddPrim(uint32_t *ot, const void *pri) {
+ _sdk_validate_args_void(ot && pri);
+
+ addPrim(ot, pri);
+}
diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c
index 8784947..236ae4b 100644
--- a/libpsn00b/psxgpu/env.c
+++ b/libpsn00b/psxgpu/env.c
@@ -1,9 +1,10 @@
/*
* PSn00bSDK GPU library (DRAWENV/DISPENV functions)
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*/
#include <stdint.h>
+#include <assert.h>
#include <psxgpu.h>
#include <hwregs_c.h>
@@ -33,9 +34,53 @@ static inline uint32_t _get_window_mask(int size) {
return mask & 0x1f;
}
+static const uint32_t *_build_drawenv_ot(const uint32_t *ot, DRAWENV *env) {
+ // All commands are grouped into a single display list packet for
+ // performance reasons using tagless primitives (the GPU does not care
+ // about the grouping as the display list is parsed by the CPU).
+ DR_ENV *prim = &(env->dr_env);
+ setaddr(prim, ot);
+ setlen(prim, 5);
+
+ // Texture page (reset active page and set dither/mask bits)
+ setDrawTPage_T(&(prim->tpage), env->dfe & 1, env->dtd & 1, env->tpage);
+
+ // Texture window
+ //setTexWindow_T(&(prim->twin), &(env->tw));
+ prim->twin.code[0] = 0xe2000000;
+ prim->twin.code[0] |= _get_window_mask(env->tw.w);
+ prim->twin.code[0] |= _get_window_mask(env->tw.h) << 5;
+ prim->twin.code[0] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10
+ prim->twin.code[0] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15
+
+ // Set drawing area
+ setDrawArea_T(&(prim->area), &(env->clip));
+ setDrawOffset_T(
+ &(prim->offset), env->clip.x + env->ofs[0], env->clip.y + env->ofs[1]
+ );
+
+ if (env->isbg) {
+ FILL_T *fill = &(prim->fill);
+ setlen(prim, 8);
+
+ // Rectangle fill
+ // FIXME: reportedly this command doesn't accept height values >511...
+ setFill_T(fill);
+ setColor0(fill, *((const uint32_t *) &(env->isbg)) >> 8);
+ setXY0(fill, env->clip.x, env->clip.y);
+ setWH(fill, env->clip.w, _min(env->clip.h, 0x1ff));
+ }
+
+ return (const uint32_t *) prim;
+}
+
/* Drawing API */
+void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot);
+
DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) {
+ _sdk_validate_args(env && (w > 0) && (h > 0), 0);
+
env->clip.x = x;
env->clip.y = y;
env->clip.w = w;
@@ -60,69 +105,41 @@ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) {
}
int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
- DR_ENV *prim = &(env->dr_env);
-
- // All commands are grouped into a single display list packet for
- // performance reasons (the GPU does not care about the grouping as the
- // display list is parsed by the DMA unit in the CPU).
- setaddr(prim, ot);
- setlen(prim, 5);
-
- // Texture page (reset active page and set dither/mask bits)
- prim->code[0] = 0xe1000000 | env->tpage;
- prim->code[0] |= (env->dtd & 1) << 9;
- prim->code[0] |= (env->dfe & 1) << 10;
+ _sdk_validate_args(ot && env, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_DMA,
+ (uint32_t) _build_drawenv_ot(ot, env),
+ 0
+ );
+}
- // Texture window
- prim->code[1] = 0xe2000000;
- prim->code[1] |= _get_window_mask(env->tw.w);
- prim->code[1] |= _get_window_mask(env->tw.h) << 5;
- prim->code[1] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10
- prim->code[1] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15
-
- // Set drawing area top left
- prim->code[2] = 0xe3000000;
- prim->code[2] |= env->clip.x & 0x3ff;
- prim->code[2] |= (env->clip.y & 0x3ff) << 10;
-
- // Set drawing area bottom right
- prim->code[3] = 0xe4000000;
- prim->code[3] |= (env->clip.x + (env->clip.w - 1)) & 0x3ff;
- prim->code[3] |= ((env->clip.y + (env->clip.h - 1)) & 0x3ff) << 10;
-
- // Set drawing offset
- prim->code[4] = 0xe5000000;
- prim->code[4] |= (env->clip.x + env->ofs[0]) & 0x7ff;
- prim->code[4] |= ((env->clip.y + env->ofs[1]) & 0x7ff) << 11;
+int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env) {
+ _sdk_validate_args(ot && env, -1);
- if (env->isbg) {
- setlen(prim, 8);
-
- // Rectangle fill
- // FIXME: reportedly this command doesn't accept height values >511...
- prim->code[5] = 0x02000000;
- //prim->code[5] |= env->r0 | (env->g0 << 8) | (env->b0 << 16);
- //prim->code[6] = env->clip.x;
- //prim->code[6] |= env->clip.y << 16;
- prim->code[5] |= *((const uint32_t *) &(env->isbg)) >> 8;
- prim->code[6] = *((const uint32_t *) &(env->clip.x));
- prim->code[7] = env->clip.w;
- prim->code[7] |= _min(env->clip.h, 0x1ff) << 16;
- }
-
- return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) prim, 0, 0);
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_GPU_IRQ,
+ (uint32_t) _build_drawenv_ot(ot, env),
+ 0
+ );
}
void PutDrawEnv(DRAWENV *env) {
- DrawOTagEnv((const uint32_t *) 0x00ffffff, env);
+ _sdk_validate_args_void(env);
+
+ DrawOTagEnv((const uint32_t *) 0xffffff, env);
}
// This function skips rebuilding the cached packet whenever possible and is
// useful if the DRAWENV structure is never modified (which is the case most of
// the time).
void PutDrawEnvFast(DRAWENV *env) {
+ _sdk_validate_args_void(env);
+
if (!(env->dr_env.tag))
- DrawOTagEnv((const uint32_t *) 0x00ffffff, env);
+ DrawOTagEnv((const uint32_t *) 0xffffff, env);
else
DrawOTag((const uint32_t *) &(env->dr_env));
}
@@ -130,6 +147,8 @@ void PutDrawEnvFast(DRAWENV *env) {
/* Display API */
DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) {
+ _sdk_validate_args(env && (w > 0) && (h > 0), 0);
+
env->disp.x = x;
env->disp.y = y;
env->disp.w = w;
@@ -148,6 +167,8 @@ DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) {
}
void PutDispEnv(const DISPENV *env) {
+ _sdk_validate_args_void(env);
+
uint32_t h_range, v_range, mode, fb_pos;
mode = _gpu_video_mode << 3;
@@ -216,6 +237,8 @@ void PutDispEnv(const DISPENV *env) {
/* Deprecated "raw" display API */
void PutDispEnvRaw(const DISPENV_RAW *env) {
+ _sdk_validate_args_void(env);
+
uint32_t h_range, v_range, fb_pos;
h_range = 608 + env->vid_xpos;
@@ -233,3 +256,30 @@ void PutDispEnvRaw(const DISPENV_RAW *env) {
GPU_GP1 = 0x08000000 | env->vid_mode; // Set video mode
GPU_GP1 = 0x05000000 | fb_pos; // Set VRAM location to display
}
+
+/* Misc. display functions */
+
+GPU_VideoMode GetVideoMode(void) {
+ return _gpu_video_mode;
+}
+
+void SetVideoMode(GPU_VideoMode mode) {
+ uint32_t _mode, stat = GPU_GP1;
+
+ _gpu_video_mode = mode & 1;
+
+ _mode = (mode & 1) << 3;
+ _mode |= (stat >> 17) & 0x37; // GPUSTAT bits 17-22 -> command bits 0-5
+ _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> command bit 6
+ _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> command bit 7
+
+ GPU_GP1 = 0x08000000 | _mode;
+}
+
+int GetODE(void) {
+ return (GPU_GP1 >> 31);
+}
+
+void SetDispMask(int mask) {
+ GPU_GP1 = 0x03000000 | (mask ? 0 : 1);
+}
diff --git a/libpsn00b/psxgpu/font.c b/libpsn00b/psxgpu/font.c
index b1c3c7a..c9d60f1 100644
--- a/libpsn00b/psxgpu/font.c
+++ b/libpsn00b/psxgpu/font.c
@@ -1,4 +1,5 @@
#include <stdint.h>
+#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -23,6 +24,7 @@ uint16_t _font_clut;
extern uint8_t _gpu_debug_font[];
void FntLoad(int x, int y) {
+ _sdk_validate_args_void((x >= 0) && (y >= 0) && (x < 1024) && (y < 1024));
RECT pos;
TIM_IMAGE tim;
@@ -66,7 +68,8 @@ void FntLoad(int x, int y) {
}
int FntOpen(int x, int y, int w, int h, int isbg, int n) {
-
+ _sdk_validate_args((w > 0) && (h > 0) && (n > 0), -1);
+
int i;
// Initialize a text stream
@@ -98,7 +101,8 @@ int FntOpen(int x, int y, int w, int h, int isbg, int n) {
}
int FntPrint(int id, const char *fmt, ...) {
-
+ _sdk_validate_args((id < _nstreams) && fmt, -1);
+
int n;
va_list ap;
@@ -124,7 +128,8 @@ int FntPrint(int id, const char *fmt, ...) {
}
char *FntFlush(int id) {
-
+ _sdk_validate_args(id < _nstreams, 0);
+
char *opri;
SPRT_8 *sprt;
DR_TPAGE *tpage;
@@ -226,7 +231,8 @@ char *FntFlush(int id) {
}
char *FntSort(uint32_t *ot, char *pri, int x, int y, const char *text) {
-
+ _sdk_validate_args(ot && pri, 0);
+
DR_TPAGE *tpage;
SPRT_8 *sprt = (SPRT_8*)pri;
int i;
diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c
index fc018a4..e02c3c2 100644
--- a/libpsn00b/psxgpu/image.c
+++ b/libpsn00b/psxgpu/image.c
@@ -1,6 +1,6 @@
/*
* PSn00bSDK GPU library (image and VRAM transfer functions)
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*
* TODO: MoveImage() is currently commented out as it won't trigger a DMA IRQ,
* making it unusable as a draw queue command. A way around this (perhaps using
@@ -9,11 +9,12 @@
#include <stdint.h>
#include <assert.h>
+#include <psxetc.h>
#include <psxgpu.h>
#include <hwregs_c.h>
#define QUEUE_LENGTH 16
-#define DMA_CHUNK_LENGTH 8
+#define DMA_CHUNK_LENGTH 16
/* Internal globals */
@@ -37,6 +38,10 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) {
length += DMA_CHUNK_LENGTH - 1;
}
+ while (!(GPU_GP1 & (1 << 26)))
+ __asm__ volatile("");
+
+ SetDrawOpType(DRAWOP_TYPE_DMA);
GPU_GP1 = 0x04000000; // Disable DMA request
GPU_GP0 = 0x01000000; // Flush cache
@@ -49,18 +54,24 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) {
// Enable DMA request, route to GP0 (2) or from GPU_READ (3)
GPU_GP1 = 0x04000002 | (write ^ 1);
- DMA_MADR(2) = (uint32_t) data;
+ while ((DMA_CHCR(DMA_GPU) & (1 << 24)) || !(GPU_GP1 & (1 << 28)))
+ __asm__ volatile("");
+
+ DMA_MADR(DMA_GPU) = (uint32_t) data;
if (length < DMA_CHUNK_LENGTH)
- DMA_BCR(2) = 0x00010000 | length;
+ DMA_BCR(DMA_GPU) = 0x00010000 | length;
else
- DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
+ DMA_BCR(DMA_GPU) = DMA_CHUNK_LENGTH |
+ ((length / DMA_CHUNK_LENGTH) << 16);
- DMA_CHCR(2) = 0x01000200 | write;
+ DMA_CHCR(DMA_GPU) = 0x01000200 | write;
}
/* VRAM transfer API */
int LoadImage(const RECT *rect, const uint32_t *data) {
+ _sdk_validate_args(rect && data, -1);
+
int index = _next_saved_rect;
_saved_rects[index] = *rect;
@@ -75,6 +86,8 @@ int LoadImage(const RECT *rect, const uint32_t *data) {
}
int StoreImage(const RECT *rect, uint32_t *data) {
+ _sdk_validate_args(rect && data, -1);
+
int index = _next_saved_rect;
_saved_rects[index] = *rect;
@@ -88,22 +101,53 @@ int StoreImage(const RECT *rect, uint32_t *data) {
);
}
+int MoveImage(const RECT *rect, int x, int y) {
+ _sdk_validate_args(rect, -1);
+
+ int index = _next_saved_rect;
+
+ _saved_rects[index] = *rect;
+ _next_saved_rect = (index + 1) % QUEUE_LENGTH;
+
+ return EnqueueDrawOp(
+ (void *) &MoveImage2,
+ (uint32_t) &_saved_rects[index],
+ (uint32_t) x,
+ (uint32_t) y
+ );
+}
+
void LoadImage2(const RECT *rect, const uint32_t *data) {
+ _sdk_validate_args_void(rect && data);
+
_dma_transfer(rect, (uint32_t *) data, 1);
}
void StoreImage2(const RECT *rect, uint32_t *data) {
+ _sdk_validate_args_void(rect && data);
+
_dma_transfer(rect, data, 0);
}
-/*void MoveImage2(const RECT *rect, int x, int y) {
+void MoveImage2(const RECT *rect, int x, int y) {
+ _sdk_validate_args_void(rect);
+
+ while (!(GPU_GP1 & (1 << 26)))
+ __asm__ volatile("");
+
+ SetDrawOpType(DRAWOP_TYPE_GPU_IRQ);
+
GPU_GP0 = 0x80000000;
//GPU_GP0 = rect->x | (rect->y << 16);
GPU_GP0 = *((const uint32_t *) &(rect->x));
GPU_GP0 = (x & 0xffff) | (y << 16);
//GPU_GP0 = rect->w | (rect->h << 16);
GPU_GP0 = *((const uint32_t *) &(rect->w));
-}*/
+
+ // As no DMA transfer is performed by this command, the GPU IRQ is used
+ // instead of the DMA IRQ to trigger the draw queue callback.
+ GPU_GP0 = 0x1f000000;
+}
/* .TIM image parsers */
@@ -112,6 +156,8 @@ void StoreImage2(const RECT *rect, uint32_t *data) {
// returning pointers to them, which become useless once the .TIM file is
// unloaded from main RAM.
int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) {
+ _sdk_validate_args(tim && info, 1);
+
if ((*(tim++) & 0xffff) != 0x0010)
return 1;
@@ -138,6 +184,8 @@ int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) {
}
int GetTimInfo(const uint32_t *tim, TIM_IMAGE *info) {
+ _sdk_validate_args(tim && info, 1);
+
if ((*(tim++) & 0xffff) != 0x0010)
return 1;
diff --git a/libpsn00b/psxpress/README.md b/libpsn00b/psxpress/README.md
index a894874..df18ec5 100644
--- a/libpsn00b/psxpress/README.md
+++ b/libpsn00b/psxpress/README.md
@@ -1,14 +1,19 @@
# PSn00bSDK MDEC library
-This is a fully open source reimplementation of the official SDK's "data
+This is a fully original reimplementation of the official SDK's "data
compression" library. This library is made up of two parts, the MDEC API and
functions to decompress Huffman-encoded bitstreams (.BS files, or frames in
-.STR files) into data to be fed to the MDEC. FMV playback is not part of this
-library (nor the official one) per se, but can implemented by using these APIs
-alongside some code to stream data from the CD drive.
+.STR files) into data to be fed to the MDEC. Two different implementations of
+the latter are provided, one using the GTE and scratchpad region and an older
+one using a large lookup table in main RAM.
-**Currently only version 1 and 2 bitstreams are supported**.
+FMV playback is not part of this library per se, but can implemented using the
+APIs defined here alongside some code to stream data from the CD drive.
+
+Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI
+bitstreams are not supported, but no encoder is publicly available for those
+anyway.
## MDEC API
@@ -26,14 +31,16 @@ The following functions are currently provided:
- `DecDCTvlcStart()`, `DecDCTvlcContinue()`: a decompressor implementation that
uses a small (<1 KB) lookup table and leverages the GTE, written in assembly.
- `DecDCTvlcCopyTable()` can optionally be called to temporarily move the table
- to the scratchpad region to improve decompression speed.
-- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: a different implementation using
+ `DecDCTvlcCopyTableV2()` or `DecDCTvlcCopyTableV3()` may optionally be called
+ to temporarily move the table to the scratchpad region in order to boost
+ decompression speed.
+- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: an older implementation using
a large (34 KB) lookup table in main RAM, written in C. The table must be
- decompressed ahead of time using `DecDCTvlcBuild()`, but can be deallocated
- when no longer needed.
+ decompressed ahead of time manually using `DecDCTvlcBuild()`, but can be
+ deallocated when no longer needed. **This implementation does not support**
+ **version 3 bitstreams**.
- `DecDCTvlc()`, `DecDCTvlc2()`: wrappers around the functions listed above,
- for compatibility with the Sony SDK. Using them is not recommended.
+ for compatibility with the Sony SDK.
## SPU ADPCM encoding API
diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c
index d15a4db..b3aa837 100644
--- a/libpsn00b/psxpress/mdec.c
+++ b/libpsn00b/psxpress/mdec.c
@@ -1,11 +1,11 @@
/*
* PSn00bSDK MDEC library (low-level MDEC/DMA API)
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*/
#include <stdint.h>
#include <assert.h>
-#include <psxapi.h>
+#include <psxetc.h>
#include <psxpress.h>
#include <hwregs_c.h>
@@ -14,14 +14,14 @@
/* Default IDCT matrix and quantization tables */
-#define S0 0x5a82 // 0x4000 * cos(0/16 * pi) * sqrt(2)
-#define S1 0x7d8a // 0x4000 * cos(1/16 * pi) * 2
-#define S2 0x7641 // 0x4000 * cos(2/16 * pi) * 2
-#define S3 0x6a6d // 0x4000 * cos(3/16 * pi) * 2
-#define S4 0x5a82 // 0x4000 * cos(4/16 * pi) * 2
-#define S5 0x471c // 0x4000 * cos(5/16 * pi) * 2
-#define S6 0x30fb // 0x4000 * cos(6/16 * pi) * 2
-#define S7 0x18f8 // 0x4000 * cos(7/16 * pi) * 2
+#define S0 0x5a82 // (1 << 14) * cos(0/16 * pi) * sqrt(2)
+#define S1 0x7d8a // (1 << 14) * cos(1/16 * pi) * 2
+#define S2 0x7641 // (1 << 14) * cos(2/16 * pi) * 2
+#define S3 0x6a6d // (1 << 14) * cos(3/16 * pi) * 2
+#define S4 0x5a82 // (1 << 14) * cos(4/16 * pi) * 2
+#define S5 0x471c // (1 << 14) * cos(5/16 * pi) * 2
+#define S6 0x30fb // (1 << 14) * cos(6/16 * pi) * 2
+#define S7 0x18f8 // (1 << 14) * cos(7/16 * pi) * 2
static const DECDCTENV _default_mdec_env = {
// The default luma and chroma quantization table is based on the MPEG-1
@@ -84,34 +84,38 @@ static const DECDCTENV _default_mdec_env = {
/* Public API */
void DecDCTReset(int mode) {
- FastEnterCriticalSection();
+ SetDMAPriority(DMA_MDEC_IN, 3);
+ SetDMAPriority(DMA_MDEC_OUT, 3);
+ DMA_CHCR(DMA_MDEC_IN) = 0x00000201; // Stop DMA
+ DMA_CHCR(DMA_MDEC_OUT) = 0x00000200; // Stop DMA
- DMA_DPCR |= 0x000000bb; // Enable DMA0 and DMA1
- DMA_CHCR(0) = 0x00000201; // Stop DMA0
- DMA_CHCR(1) = 0x00000200; // Stop DMA1
- MDEC1 = 0x80000000; // Reset MDEC
- MDEC1 = 0x60000000; // Enable DMA in/out requests
+ MDEC1 = 0x80000000; // Reset MDEC
+ MDEC1 = 0x60000000; // Enable DMA in/out requests
- FastExitCriticalSection();
if (!mode)
DecDCTPutEnv(0, 0);
}
void DecDCTPutEnv(const DECDCTENV *env, int mono) {
- const DECDCTENV *_env = env ? env : &_default_mdec_env;
DecDCTinSync(0);
+ if (!env)
+ env = &_default_mdec_env;
MDEC0 = 0x60000000; // Set IDCT matrix
- DecDCTinRaw((const uint32_t *) _env->dct, 32);
+ DecDCTinRaw((const uint32_t *) env->dct, 32);
DecDCTinSync(0);
- MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set table(s)
- DecDCTinRaw((const uint32_t *) _env->iq_y, mono ? 16 : 32);
+ MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set quantization table(s)
+ DecDCTinRaw((const uint32_t *) env->iq_y, mono ? 16 : 32);
DecDCTinSync(0);
}
void DecDCTin(const uint32_t *data, int mode) {
+ _sdk_validate_args_void(data);
+
uint32_t header = *data;
+ DecDCTinSync(0);
+
if (mode == DECDCT_MODE_RAW)
MDEC0 = header;
else if (mode & DECDCT_MODE_24BPP)
@@ -126,18 +130,21 @@ void DecDCTin(const uint32_t *data, int mode) {
// data length as an argument rather than parsing it from the first 4 bytes of
// the stream.
void DecDCTinRaw(const uint32_t *data, size_t length) {
+ _sdk_validate_args_void(data && length);
+
if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
_sdk_log("input data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
length += DMA_CHUNK_LENGTH - 1;
}
- DMA_MADR(0) = (uint32_t) data;
+ DMA_MADR(DMA_MDEC_IN) = (uint32_t) data;
if (length < DMA_CHUNK_LENGTH)
- DMA_BCR(0) = 0x00010000 | length;
+ DMA_BCR(DMA_MDEC_IN) = 0x00010000 | length;
else
- DMA_BCR(0) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
+ DMA_BCR(DMA_MDEC_IN) = DMA_CHUNK_LENGTH |
+ ((length / DMA_CHUNK_LENGTH) << 16);
- DMA_CHCR(0) = 0x01000201;
+ DMA_CHCR(DMA_MDEC_IN) = 0x01000201;
}
int DecDCTinSync(int mode) {
@@ -149,11 +156,13 @@ int DecDCTinSync(int mode) {
return 0;
}
- _sdk_log("DecDCTinSync() timeout\n");
+ _sdk_log("DecDCTinSync() timeout, MDEC1=0x%08x\n", MDEC1);
return -1;
}
void DecDCTout(uint32_t *data, size_t length) {
+ _sdk_validate_args_void(data && length);
+
DecDCToutSync(0);
if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
@@ -161,24 +170,25 @@ void DecDCTout(uint32_t *data, size_t length) {
length += DMA_CHUNK_LENGTH - 1;
}
- DMA_MADR(1) = (uint32_t) data;
+ DMA_MADR(DMA_MDEC_OUT) = (uint32_t) data;
if (length < DMA_CHUNK_LENGTH)
- DMA_BCR(1) = 0x00010000 | length;
+ DMA_BCR(DMA_MDEC_OUT) = 0x00010000 | length;
else
- DMA_BCR(1) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
+ DMA_BCR(DMA_MDEC_OUT) = DMA_CHUNK_LENGTH |
+ ((length / DMA_CHUNK_LENGTH) << 16);
- DMA_CHCR(1) = 0x01000200;
+ DMA_CHCR(DMA_MDEC_OUT) = 0x01000200;
}
int DecDCToutSync(int mode) {
if (mode)
- return (DMA_CHCR(1) >> 24) & 1;
+ return (DMA_CHCR(DMA_MDEC_OUT) >> 24) & 1;
for (int i = MDEC_SYNC_TIMEOUT; i; i--) {
- if (!(DMA_CHCR(1) & (1 << 24)))
+ if (!(DMA_CHCR(DMA_MDEC_OUT) & (1 << 24)))
return 0;
}
- _sdk_log("DecDCToutSync() timeout\n");
+ _sdk_log("DecDCToutSync() timeout, CHCR=0x%08x\n", DMA_CHCR(DMA_MDEC_OUT));
return -1;
}
diff --git a/libpsn00b/psxpress/vlc.c b/libpsn00b/psxpress/vlc.c
index 4e3e283..36cfbe2 100644
--- a/libpsn00b/psxpress/vlc.c
+++ b/libpsn00b/psxpress/vlc.c
@@ -1,6 +1,6 @@
/*
* PSn00bSDK MDEC library (support code for the main VLC decompressor)
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*/
#include <stdint.h>
@@ -10,87 +10,120 @@
/* Huffman code lookup table */
-#define _val1(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff))
-#define _val2(rl, dc, len) (_val1(rl, dc) | (len << 16))
+#define _DC(y, c) (((y) << 4) | (c))
+#define _AC(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff))
+#define _ACL(rl, dc, len) (_AC(rl, dc) | ((len) << 16))
-#define _pair(rl, dc) _val1(rl, dc), _val1(rl, -(dc))
-#define _pair2(rl, dc, len) _val2(rl, dc, len), _val2(rl, -(dc), len)
-#define _pair3(rl, dc, len) \
- _val2(rl, dc, len), _val2(rl, dc, len), \
- _val2(rl, -(dc), len), _val2(rl, -(dc), len)
-#define _pair4(rl, dc, len) \
- _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \
- _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \
- _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), \
- _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len)
+#define _DC2(y, c) _DC(y, c), _DC(y, c)
+#define _DC3(y, c) _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c)
+#define _DC4(y, c) \
+ _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c), \
+ _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c)
+#define _AC2(rl, dc) _AC(rl, dc), _AC(rl, -(dc))
+#define _ACL2(rl, dc, len) _ACL(rl, dc, len), _ACL(rl, -(dc), len)
+#define _ACL3(rl, dc, len) \
+ _ACL(rl, dc, len), _ACL(rl, dc, len), \
+ _ACL(rl, -(dc), len), _ACL(rl, -(dc), len)
+#define _ACL4(rl, dc, len) \
+ _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), \
+ _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), \
+ _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), \
+ _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len)
// This table isn't compressed since it makes no sense to compress less than a
// kilobyte's worth of data.
-static const DECDCTTAB _default_huffman_table = {
- .lut0 = {
+static const VLC_TableV3 _default_huffman_table = {
+ .ac0 = {
// 11 x
- _pair( 0, 1)
+ _AC2( 0, 1)
},
- .lut2 = {
+ .ac2 = {
// 01 0xx
- _pair2( 0, 2, 5), _pair2( 2, 1, 5),
+ _ACL2( 0, 2, 5), _ACL2( 2, 1, 5),
// 01 1x-
- _pair3( 1, 1, 4)
+ _ACL3( 1, 1, 4)
},
- .lut3 = {
+ .ac3 = {
// 001 00xxxx
- _pair2(13, 1, 9), _pair2( 0, 6, 9), _pair2(12, 1, 9), _pair2(11, 1, 9),
- _pair2( 3, 2, 9), _pair2( 1, 3, 9), _pair2( 0, 5, 9), _pair2(10, 1, 9),
+ _ACL2(13, 1, 9), _ACL2( 0, 6, 9), _ACL2(12, 1, 9), _ACL2(11, 1, 9),
+ _ACL2( 3, 2, 9), _ACL2( 1, 3, 9), _ACL2( 0, 5, 9), _ACL2(10, 1, 9),
// 001 xxx---
- _pair4( 0, 3, 6), _pair4( 4, 1, 6), _pair4( 3, 1, 6)
+ _ACL4( 0, 3, 6), _ACL4( 4, 1, 6), _ACL4( 3, 1, 6)
},
- .lut4 = {
+ .ac4 = {
// 0001 xxx
- _pair( 7, 1), _pair( 6, 1), _pair( 1, 2), _pair( 5, 1)
+ _AC2( 7, 1), _AC2( 6, 1), _AC2( 1, 2), _AC2( 5, 1)
},
- .lut5 = {
+ .ac5 = {
// 00001 xxx
- _pair( 2, 2), _pair( 9, 1), _pair( 0, 4), _pair( 8, 1)
+ _AC2( 2, 2), _AC2( 9, 1), _AC2( 0, 4), _AC2( 8, 1)
},
- .lut7 = {
+ .ac7 = {
// 0000001 xxxx
- _pair(16, 1), _pair( 5, 2), _pair( 0, 7), _pair( 2, 3),
- _pair( 1, 4), _pair(15, 1), _pair(14, 1), _pair( 4, 2)
+ _AC2(16, 1), _AC2( 5, 2), _AC2( 0, 7), _AC2( 2, 3),
+ _AC2( 1, 4), _AC2(15, 1), _AC2(14, 1), _AC2( 4, 2)
},
- .lut8 = {
+ .ac8 = {
// 00000001 xxxxx
- _pair( 0, 11), _pair( 8, 2), _pair( 4, 3), _pair( 0, 10),
- _pair( 2, 4), _pair( 7, 2), _pair(21, 1), _pair(20, 1),
- _pair( 0, 9), _pair(19, 1), _pair(18, 1), _pair( 1, 5),
- _pair( 3, 3), _pair( 0, 8), _pair( 6, 2), _pair(17, 1)
+ _AC2( 0, 11), _AC2( 8, 2), _AC2( 4, 3), _AC2( 0, 10),
+ _AC2( 2, 4), _AC2( 7, 2), _AC2(21, 1), _AC2(20, 1),
+ _AC2( 0, 9), _AC2(19, 1), _AC2(18, 1), _AC2( 1, 5),
+ _AC2( 3, 3), _AC2( 0, 8), _AC2( 6, 2), _AC2(17, 1)
},
- .lut9 = {
+ .ac9 = {
// 000000001 xxxxx
- _pair(10, 2), _pair( 9, 2), _pair( 5, 3), _pair( 3, 4),
- _pair( 2, 5), _pair( 1, 7), _pair( 1, 6), _pair( 0, 15),
- _pair( 0, 14), _pair( 0, 13), _pair( 0, 12), _pair(26, 1),
- _pair(25, 1), _pair(24, 1), _pair(23, 1), _pair(22, 1)
+ _AC2(10, 2), _AC2( 9, 2), _AC2( 5, 3), _AC2( 3, 4),
+ _AC2( 2, 5), _AC2( 1, 7), _AC2( 1, 6), _AC2( 0, 15),
+ _AC2( 0, 14), _AC2( 0, 13), _AC2( 0, 12), _AC2(26, 1),
+ _AC2(25, 1), _AC2(24, 1), _AC2(23, 1), _AC2(22, 1)
},
- .lut10 = {
+ .ac10 = {
// 0000000001 xxxxx
- _pair( 0, 31), _pair( 0, 30), _pair( 0, 29), _pair( 0, 28),
- _pair( 0, 27), _pair( 0, 26), _pair( 0, 25), _pair( 0, 24),
- _pair( 0, 23), _pair( 0, 22), _pair( 0, 21), _pair( 0, 20),
- _pair( 0, 19), _pair( 0, 18), _pair( 0, 17), _pair( 0, 16)
+ _AC2( 0, 31), _AC2( 0, 30), _AC2( 0, 29), _AC2( 0, 28),
+ _AC2( 0, 27), _AC2( 0, 26), _AC2( 0, 25), _AC2( 0, 24),
+ _AC2( 0, 23), _AC2( 0, 22), _AC2( 0, 21), _AC2( 0, 20),
+ _AC2( 0, 19), _AC2( 0, 18), _AC2( 0, 17), _AC2( 0, 16)
},
- .lut11 = {
+ .ac11 = {
// 00000000001 xxxxx
- _pair( 0, 40), _pair( 0, 39), _pair( 0, 38), _pair( 0, 37),
- _pair( 0, 36), _pair( 0, 35), _pair( 0, 34), _pair( 0, 33),
- _pair( 0, 32), _pair( 1, 14), _pair( 1, 13), _pair( 1, 12),
- _pair( 1, 11), _pair( 1, 10), _pair( 1, 9), _pair( 1, 8)
+ _AC2( 0, 40), _AC2( 0, 39), _AC2( 0, 38), _AC2( 0, 37),
+ _AC2( 0, 36), _AC2( 0, 35), _AC2( 0, 34), _AC2( 0, 33),
+ _AC2( 0, 32), _AC2( 1, 14), _AC2( 1, 13), _AC2( 1, 12),
+ _AC2( 1, 11), _AC2( 1, 10), _AC2( 1, 9), _AC2( 1, 8)
},
- .lut12 = {
+ .ac12 = {
// 000000000001 xxxxx
- _pair( 1, 18), _pair( 1, 17), _pair( 1, 16), _pair( 1, 15),
- _pair( 6, 3), _pair(16, 2), _pair(15, 2), _pair(14, 2),
- _pair(13, 2), _pair(12, 2), _pair(11, 2), _pair(31, 1),
- _pair(30, 1), _pair(29, 1), _pair(28, 1), _pair(27, 1)
+ _AC2( 1, 18), _AC2( 1, 17), _AC2( 1, 16), _AC2( 1, 15),
+ _AC2( 6, 3), _AC2(16, 2), _AC2(15, 2), _AC2(14, 2),
+ _AC2(13, 2), _AC2(12, 2), _AC2(11, 2), _AC2(31, 1),
+ _AC2(30, 1), _AC2(29, 1), _AC2(28, 1), _AC2(27, 1)
+ },
+ .dc = {
+ // 00-----
+ _DC4(1, 0), _DC4(1, 0), _DC4(1, 0), _DC4(1, 0),
+ // 01-----
+ _DC4(2, 1), _DC4(2, 1), _DC4(2, 1), _DC4(2, 1),
+ // 100----
+ _DC4(0, 2), _DC4(0, 2),
+ // 101----
+ _DC4(3, 2), _DC4(3, 2),
+ // 110----
+ _DC4(4, 3), _DC4(4, 3),
+ // 1110---
+ _DC4(5, 4),
+ // 11110--
+ _DC3(6, 5),
+ // 111110-
+ _DC2(7, 6),
+ // 1111110
+ _DC(8, 7),
+ // 1111111(0)
+ _DC(0, 8)
+ },
+ .dc_len = {
+ _DC(3, 2), _DC(2, 2), _DC(2, 2), _DC(3, 3),
+ _DC(3, 4), _DC(4, 5), _DC(5, 6), _DC(6, 7),
+ _DC(7, 8)
}
};
@@ -100,7 +133,7 @@ static const DECDCTTAB _default_huffman_table = {
static VLC_Context _default_context;
static size_t _max_buffer_size = 0;
-const DECDCTTAB *_vlc_huffman_table = &_default_huffman_table;
+const VLC_TableV3 *_vlc_huffman_table = &_default_huffman_table;
/* Stateful VLC decoder API (for Sony SDK compatibility) */
@@ -120,10 +153,19 @@ size_t DecDCTvlcSize(size_t size) {
/* Lookup table relocation API */
-void DecDCTvlcCopyTable(DECDCTTAB *addr) {
+void DecDCTvlcCopyTableV2(VLC_TableV2 *addr) {
+ if (addr) {
+ _vlc_huffman_table = (const VLC_TableV3 *) addr;
+ memcpy(addr, &_default_huffman_table, sizeof(VLC_TableV2));
+ } else {
+ _vlc_huffman_table = &_default_huffman_table;
+ }
+}
+
+void DecDCTvlcCopyTableV3(VLC_TableV3 *addr) {
if (addr) {
- _vlc_huffman_table = addr;
- memcpy(addr, &_default_huffman_table, sizeof(DECDCTTAB));
+ _vlc_huffman_table = (const VLC_TableV3 *) addr;
+ memcpy(addr, &_default_huffman_table, sizeof(VLC_TableV3));
} else {
_vlc_huffman_table = &_default_huffman_table;
}
diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s
index f3a1c67..2de22f7 100644
--- a/libpsn00b/psxpress/vlc.s
+++ b/libpsn00b/psxpress/vlc.s
@@ -1,375 +1,576 @@
# PSn00bSDK MDEC library (GTE-accelerated VLC decompressor)
-# (C) 2022 spicyjpeg - MPL licensed
+# (C) 2022-2023 spicyjpeg - MPL licensed
#
-# Register map:
-# - $a0 = ctx
-# - $a1 = output
-# - $a2 = max_size
-# - $a3 = input
-# - $t0 = window
-# - $t1 = next_window
-# - $t2 = remaining
-# - $t3 = quant_scale
-# - $t4 = is_v3
-# - $t5 = bit_offset
-# - $t6 = block_index
-# - $t7 = coeff_index
-# - $t8 = _vlc_huffman_table
-# - $t9 = &ac_jump_area
+# TODO: reduce the size of the v3 DC coefficient decoder; currently the code is
+# duplicated for each block type, but it can probably be shortened with no
+# performance impact...
-.set noreorder
+.include "gtereg.inc"
-.set VLC_Context_input, 0
-.set VLC_Context_window, 4
-.set VLC_Context_next_window, 8
-.set VLC_Context_remaining, 12
-.set VLC_Context_quant_scale, 16
-.set VLC_Context_is_v3, 18
-.set VLC_Context_bit_offset, 19
-.set VLC_Context_block_index, 20
-.set VLC_Context_coeff_index, 21
-
-.set DECDCTTAB_lut0, 0
-.set DECDCTTAB_lut2, 4
-.set DECDCTTAB_lut3, 36
-.set DECDCTTAB_lut4, 292
-.set DECDCTTAB_lut5, 308
-.set DECDCTTAB_lut7, 324
-.set DECDCTTAB_lut8, 356
-.set DECDCTTAB_lut9, 420
-.set DECDCTTAB_lut10, 484
-.set DECDCTTAB_lut11, 548
-.set DECDCTTAB_lut12, 612
+.set noreorder
+.set noat
+
+.set value, $v0
+.set length, $v1
+.set ctx, $a0
+.set output, $a1
+.set max_size, $a2
+.set input, $a3
+.set temp, $t0
+.set window, $t1
+.set next_window, $t2
+.set remaining, $t3
+.set is_v3, $t4
+.set bit_offset, $t5
+.set block_index, $t6
+.set coeff_index, $t7
+.set quant_scale, $s0
+.set last_y, $s1
+.set last_cr, $s2
+.set last_cb, $s3
+.set huffman_table, $t8
+.set ac_jump_area, $t9
+
+.set VLC_Context_input, 0x0
+.set VLC_Context_window, 0x4
+.set VLC_Context_next_window, 0x8
+.set VLC_Context_remaining, 0xc
+.set VLC_Context_is_v3, 0x10
+.set VLC_Context_bit_offset, 0x11
+.set VLC_Context_block_index, 0x12
+.set VLC_Context_coeff_index, 0x13
+.set VLC_Context_quant_scale, 0x14
+.set VLC_Context_last_y, 0x16
+.set VLC_Context_last_cr, 0x18
+.set VLC_Context_last_cb, 0x1a
+
+.set VLC_Table_ac0, 0x0
+.set VLC_Table_ac2, 0x4
+.set VLC_Table_ac3, 0x24
+.set VLC_Table_ac4, 0x124
+.set VLC_Table_ac5, 0x134
+.set VLC_Table_ac7, 0x144
+.set VLC_Table_ac8, 0x164
+.set VLC_Table_ac9, 0x1a4
+.set VLC_Table_ac10, 0x1e4
+.set VLC_Table_ac11, 0x224
+.set VLC_Table_ac12, 0x264
+.set VLC_Table_dc, 0x2a4
+.set VLC_Table_dc_len, 0x324
.section .text.DecDCTvlcStart
.global DecDCTvlcStart
.type DecDCTvlcStart, @function
DecDCTvlcStart:
+ addiu $sp, -16
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+
# Create a new context on-the-fly without writing it to memory then jump
# into DecDCTvlcContinue(), skipping context loading.
- lw $t0, 8($a3) # window = (bs->data[0] << 16) | (bs->data[0] >> 16)
- nop
- srl $v0, $t0, 16
- sll $t0, 16
-
- lw $t1, 12($a3) # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16)
- or $t0, $v0
- srl $v0, $t1, 16
- sll $t1, 16
-
- lhu $t2, 0($a3) # remaining = bs->uncomp_length * 2
- or $t1, $v0
-
- lhu $t3, 4($a3) # quant_scale = (bs->quant_scale & 63) << 10
- sll $t2, 1
- andi $t3, 63
-
- lhu $t4, 6($a3) # is_v3 = !(bs->version < 3)
- sll $t3, 10
- sltiu $t4, $t4, 3
- xori $t4, 1
-
- li $t5, 32 # bit_offset = 32
- li $t6, 5 # block_index = 5
- li $t7, 0 # coeff_index = 0
+ lw window, 8(input) # window = (bs->data[0] << 16) | (bs->data[0] >> 16)
+ li last_y, 0
+ srl temp, window, 16
+ sll window, 16
+ or window, temp
+
+ # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16)
+ lw next_window, 12(input)
+ li last_cr, 0
+ srl temp, next_window, 16
+ sll next_window, 16
+ or next_window, temp
+
+ lhu remaining, 0(input) # remaining = bs->uncomp_length * 2
+ li last_cb, 0
+ sll remaining, 1
+
+ lw temp, 4(input) # quant_scale = (bs->quant_scale & 63) << 10
+ li bit_offset, 32
+ andi quant_scale, temp, 63
+ sll quant_scale, 10
+
+ srl temp, 16 # is_v3 = !(bs->version < 3)
+ sltiu is_v3, temp, 3
+ xori is_v3, 1
+
+ li block_index, 5
+ li coeff_index, 0
j _vlc_skip_context_load
- addiu $a3, 16 # input = &(bs->data[2])
+ addiu input, 16 # input = &(bs->data[2])
.section .text.DecDCTvlcContinue
.global DecDCTvlcContinue
.type DecDCTvlcContinue, @function
DecDCTvlcContinue:
- lw $a3, VLC_Context_input($a0)
- lw $t0, VLC_Context_window($a0)
- lw $t1, VLC_Context_next_window($a0)
- lw $t2, VLC_Context_remaining($a0)
- lhu $t3, VLC_Context_quant_scale($a0)
- lb $t4, VLC_Context_is_v3($a0)
- lb $t5, VLC_Context_bit_offset($a0)
- lb $t6, VLC_Context_block_index($a0)
- lb $t7, VLC_Context_coeff_index($a0)
+ addiu $sp, -16
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+
+ lw input, VLC_Context_input(ctx)
+ lw window, VLC_Context_window(ctx)
+ lw next_window, VLC_Context_next_window(ctx)
+ lw remaining, VLC_Context_remaining(ctx)
+ lb is_v3, VLC_Context_is_v3(ctx)
+ lb bit_offset, VLC_Context_bit_offset(ctx)
+ lb block_index, VLC_Context_block_index(ctx)
+ lb coeff_index, VLC_Context_coeff_index(ctx)
+ lhu quant_scale, VLC_Context_quant_scale(ctx)
+ lh last_y, VLC_Context_last_y(ctx)
+ lh last_cr, VLC_Context_last_cr(ctx)
+ lh last_cb, VLC_Context_last_cb(ctx)
_vlc_skip_context_load:
- # Determine how many bytes to output. This whole block of code basically
- # does this:
+ # Determine how many bytes to output.
+ # if (max_size <= 0) max_size = 0x3fff0000
# max_size = min((max_size - 1) * 2, remaining)
# remaining -= max_size
- bgtz $a2, .Lmax_size_valid # if (max_size <= 0) max_size = 0x7ffe0000
- addiu $a2, -1 # else max_size = (max_size - 1) * 2
- lui $a2, 0x3fff
+ bgtz max_size, .Lmax_size_valid
+ addiu max_size, -1
+ lui max_size, 0x3fff
.Lmax_size_valid:
- sll $a2, 1
+ sll max_size, 1
- blt $a2, $t2, .Lmax_size_ok # if (max_size > remaining) max_size = remaining
- lui $v1, 0x3800
- move $a2, $t2
-.Lmax_size_ok:
- subu $t2, $a2 # remaining -= max_size
+ subu remaining, max_size
+ bgez remaining, .Lmax_size_ok
+ lui temp, 0x3800
+ addu max_size, remaining
+ li remaining, 0
+
+.Lmax_size_ok:
# Write the length of the data that will be decoded to first 4 bytes of the
# output buffer, which will be then parsed by DecDCTin().
- srl $v0, $a2, 1 # output[0] = 0x38000000 | (max_size / 2)
- or $v0, $v1
- sw $v0, 0($a1)
+ srl value, max_size, 1 # output[0] = 0x38000000 | (max_size / 2)
+ or value, temp
+ sw value, 0(output)
# Obtain the addresses of the lookup table and jump area in advance so that
# they don't have to be retrieved for each coefficient decoded.
- lw $t8, _vlc_huffman_table
- la $t9, .Lac_prefix_10
+ lw huffman_table, _vlc_huffman_table
+ la ac_jump_area, .Lac_prefix_01 - 32
- beqz $a2, .Lstop_processing
- addiu $a1, 4 # output = (uint16_t *) &output[1]
+ beqz max_size, .Lstop_processing
+ addiu output, 4
.Lprocess_next_code_loop: # while (max_size)
# This is the "hot" part of the decoder, executed for each code in the
# bitstream. The first step is to determine if the next code is a DC or AC
- # coefficient.
- bnez $t7, .Lprocess_ac_coefficient
- addiu $t7, 1 # coeff_index++
- bnez $t4, .Lprocess_dc_v3_coefficient
- li $v1, 0x01ff
+ # coefficient; at the same time the GTE is given the task of counting the
+ # number of leading zeroes/ones in the code (which takes 2 more cycles).
+ mtc2 window, C2_LZCS
+
+ bnez coeff_index, .Lprocess_ac_coefficient
+ addiu coeff_index, 1
+ bnez is_v3, .Lprocess_dc_v3_coefficient
+ li temp, 0x1ff
.Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3)
# The DC coefficient in version 2 frames is not compressed. Value 0x1ff is
# used to signal the end of the bitstream.
- srl $v0, $t0, 22 # prefix = (window >> (32 - 10))
- beq $v0, $v1, .Lstop_processing # if (prefix == 0x1ff) break
- or $v0, $t3 # *output = prefix | quant_scale
- sll $t0, 10 # window <<= 10
- b .Lwrite_value
- addiu $t5, -10 # bit_offset -= 10
+ # prefix = window >> (32 - 10)
+ # if (prefix == 0x1ff) break
+ # *output = prefix | quant_scale
+ srl value, window, 22
+ beq value, temp, .Lstop_processing
+ or value, quant_scale
+ sll window, 10
+ addiu bit_offset, -10
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3)
- # TODO: version 3 is currently not supported.
- jr $ra
- li $v0, -1
-
-.Lprocess_ac_coefficient: # if (coeff_index)
- # Check whether the prefix code is one of the shorter, more common ones,
- # and start counting the number of leading zeroes/ones using the GTE (which
- # takes 2 more cycles).
- srl $v0, $t0, 30
- li $v1, 3
- beq $v0, $v1, .Lac_prefix_11
- li $v1, 2
- beq $v0, $v1, .Lac_prefix_10
- li $v1, 1
- mtc2 $t0, $30
- beq $v0, $v1, .Lac_prefix_01
+ # Version 3 DC coefficients are variable-length deltas, prefixed with a
+ # Huffman code indicating their length. Since the prefix code is up to 7
+ # bits long, it makes sense to decode it with a simple 128-byte lookup
+ # table rather than using the GTE. The codes are different for luma and
+ # chroma blocks, so each table entry contains the decoded length for both
+ # block types (packed as two nibbles). Prefix 111111111 is used to signal
+ # the end of the bitstream.
+ # prefix = window >> (32 - 9)
+ # if (prefix == 0x1ff) break
+ # lengths = huffman_table->dc[prefix >> 2]
+ srl length, window, 23
+ beq length, temp, .Lstop_processing
+ srl length, 2
+ addu length, huffman_table
+
+ addiu $at, block_index, -4
+ bltz $at, .Ldc_block_y
+ lbu length, VLC_Table_dc(length)
+ beqz $at, .Ldc_block_cb
+ andi length, 15 # if (block_index >= Cb) dc_length = lengths & 15
+
+.Ldc_block_cr: # if (block_index > Cb)
+ # prefix_length = huffman_table->dc_len[dc_length] & 15
+ addu temp, length, huffman_table
+ lbu temp, VLC_Table_dc_len(temp)
+ li $at, 32
+ andi temp, 15
+
+ sllv window, window, temp
+ beqz length, .Ldc_cr_zero # if (dc_length)
+ subu bit_offset, temp
+
+ subu $at, length # value = window >> (32 - dc_length)
+ srlv value, window, $at
+
+ # Decode the sign bit, then add the decoded delta to the current value.
+ # if (!(window >> 31)) value -= (1 << dc_length) - 1
+ bltz window, .Ldc_cr_positive
+ li temp, -1
+ srlv temp, temp, $at
+ subu value, temp
+.Ldc_cr_positive:
+ addu last_cr, value
+ andi last_cr, 0x3ff
+
+.Ldc_cr_zero:
+ sll temp, last_cr, 2 # *output = (last_cr << 2) | quant_scale
+ or temp, quant_scale
+ b .Lupdate_window_dc # update_window(dc_length)
+ sh temp, 0(output)
+
+.Ldc_block_cb: # if (block_index == Cb)
+ # prefix_length = huffman_table->dc_len[dc_length] & 15
+ addu temp, length, huffman_table
+ lbu temp, VLC_Table_dc_len(temp)
+ li $at, 32
+ andi temp, 15
+
+ sllv window, window, temp
+ beqz length, .Ldc_cb_zero # if (dc_length)
+ subu bit_offset, temp
+
+ subu $at, length # value = window >> (32 - dc_length)
+ srlv value, window, $at
+
+ # Decode the sign bit, then add the decoded delta to the current value.
+ # if (!(window >> 31)) value -= (1 << dc_length) - 1
+ bltz window, .Ldc_cb_positive
+ li temp, -1
+ srlv temp, temp, $at
+ subu value, temp
+.Ldc_cb_positive:
+ addu last_cb, value
+ andi last_cb, 0x3ff
+
+.Ldc_cb_zero:
+ sll value, last_cb, 2 # *output = (last_cb << 2) | quant_scale
+ or value, quant_scale
+ b .Lupdate_window_dc # update_window(dc_length)
+ sh value, 0(output)
+
+.Ldc_block_y: # if (block_index < Cb)
nop
+ srl length, 4 # dc_length = lengths >> 4
+
+ # prefix_length = huffman_table->dc_len[dc_length] >> 4
+ addu temp, length, huffman_table
+ lbu temp, VLC_Table_dc_len(temp)
+ li $at, 32
+ srl temp, 4
+
+ sllv window, window, temp
+ beqz length, .Ldc_y_zero # if (dc_length)
+ subu bit_offset, temp
+
+ sll temp, last_y, 2
+ subu $at, length # value = window >> (32 - dc_length)
+ srlv value, window, $at
+
+ # Decode the sign bit, then add the decoded delta to the current value.
+ # if (!(window >> 31)) value -= (1 << dc_length) - 1
+ bltz window, .Ldc_y_positive
+ li temp, -1
+ srlv temp, temp, $at
+ subu value, temp
+.Ldc_y_positive:
+ addu last_y, value
+ andi last_y, 0x3ff
+
+.Ldc_y_zero:
+ sll temp, last_y, 2 # *output = (last_y << 2) | quant_scale
+ or temp, quant_scale
+ b .Lupdate_window_dc # update_window(dc_length)
+ sh temp, 0(output)
- # If the code is longer, retrieve the number of leading zeroes from the GTE
- # and use it as an index into the jump area. Each block in the area is 8
- # instructions long and handles decoding a specific prefix.
- mfc2 $v0, $31
- li $v1, 11
- bgt $v0, $v1, .Lreturn_error # if (prefix > 11) return -1
- sll $v0, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(u32)]
- addu $v0, $t9
- jr $v0
+.Lprocess_ac_coefficient: # if (coeff_index)
+ # Check whether the prefix code is 10 or 11 (i.e. if it starts with 1). If
+ # not, retrieve the number of leading zeroes from the GTE and use it as an
+ # index into the jump area. Each block in the area is 8 instructions long
+ # and handles decoding a specific prefix.
+ mfc2 temp, C2_LZCR
+
+ bltz window, .Lac_prefix_1 # if (!(window >> 31))
+ addiu $at, temp, -11 # if (prefix > 11) return -1
+ bgtz $at, .Lreturn_error
+ sll temp, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(uint32_t)]
+ addu temp, ac_jump_area
+ jr temp
nop
.Lreturn_error:
- jr $ra
+ b .Lreturn
li $v0, -1
-.Lac_prefix_11:
- # Prefix 11 is followed by a single bit.
- srl $v0, $t0, 28 # index = ((window >> (32 - 2 - 1)) & 1) * sizeof(u16)
- andi $v0, 2
- addu $v0, $t8 # value = table->lut0[index]
- lhu $v0, DECDCTTAB_lut0($v0)
- sll $t0, 3 # window <<= 3
- b .Lwrite_value
- addiu $t5, -3 # bit_offset -= 3
- #.word 0
+.Lac_prefix_1: # if (window >> 31)
+ sll window, 1
+ bltz window, .Lac_prefix_11
+ li temp, 0xfe00
.Lac_prefix_10:
# Prefix 10 marks the end of a block.
- li $v0, 0xfe00 # value = 0xfe00
- sll $t0, 2 # window <<= 2
- addiu $t5, -2 # bit_offset -= 2
- addiu $t6, -1 # block_index--
- bgez $t6, .Lwrite_value
- li $t7, 0 # coeff_index = 0
- b .Lwrite_value
- li $t6, 5 # if (block_index < 0) block_index = 5
+ # *output = 0xfe00
+ # coeff_index = 0
+ # if (--block_index < Y3) block_index = Cr
+ sll window, 1
+ addiu bit_offset, -2
+ sh temp, 0(output)
+
+ addiu block_index, -1
+ bgez block_index, .Lfeed_bitstream
+ li coeff_index, 0
+ b .Lfeed_bitstream
+ li block_index, 5
+
+.Lac_prefix_11:
+ # Prefix 11 is followed by a single bit. Note that the 10/11 prefix check
+ # already shifts the window by one bit (without updating the bit offset).
+ # index = ((window >> (32 - 1 - 1)) & 1) * sizeof(uint16_t)
+ # *output = huffman_table->ac0[index]
+ srl value, window, 29
+ andi value, 2
+ addu value, huffman_table
+ lhu value, VLC_Table_ac0(value)
+ sll window, 2
+ addiu bit_offset, -3
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_01:
# Prefix 01 can be followed by a 2-bit lookup index starting with 1, or a
# 3-bit lookup index starting with 0. A 32-bit lookup table is used,
# containing both MDEC codes and lengths.
- srl $v0, $t0, 25 # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(u32)
- andi $v0, 28
- addu $v0, $t8 # value = table->lut2[index]
- lw $v0, DECDCTTAB_lut2($v0)
- b .Lupdate_window_and_write
- srl $v1, $v0, 16 # length = value >> 16
+ # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(uint32_t)
+ # *output = huffman_table->ac2[index] & 0xffff
+ # length = huffman_table->ac2[index] >> 16
+ srl value, window, 25
+ andi value, 28
+ addu value, huffman_table
+ lw value, VLC_Table_ac2(value)
+
+ b .Lupdate_window_ac # update_window(value >> 16)
+ sh value, 0(output)
.word 0, 0
.Lac_prefix_001:
# Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a
# 3-bit lookup index starting with 01/10/11.
- srl $v0, $t0, 21 # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(u32)
- andi $v0, 252
- addu $v0, $t8 # value = table->lut3[index]
- lw $v0, DECDCTTAB_lut3($v0)
- b .Lupdate_window_and_write
- srl $v1, $v0, 16 # length = value >> 16
+ # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(uint32_t)
+ # *output = huffman_table->ac3[index] & 0xffff
+ # length = huffman_table->ac3[index] >> 16
+ srl value, window, 21
+ andi value, 252
+ addu value, huffman_table
+ lw value, VLC_Table_ac3(value)
+
+ b .Lupdate_window_ac # update_window(value >> 16)
+ sh value, 0(output)
.word 0, 0
.Lac_prefix_0001:
# Prefix 0001 is followed by a 3-bit lookup index.
- srl $v0, $t0, 24 # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(u16)
- andi $v0, 14
- addu $v0, $t8 # value = table->lut4[index]
- lhu $v0, DECDCTTAB_lut4($v0)
- sll $t0, 7 # window <<= 4 + 3
- b .Lwrite_value
- addiu $t5, -7 # bit_offset -= 4 + 3
- .word 0
+ # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(uint16_t)
+ # *output = huffman_table->ac4[index]
+ srl value, window, 24
+ andi value, 14
+ addu value, huffman_table
+ lhu value, VLC_Table_ac4(value)
+ sll window, 7
+ addiu bit_offset, -7
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_00001:
# Prefix 00001 is followed by a 3-bit lookup index.
- srl $v0, $t0, 23 # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(u16)
- andi $v0, 14
- addu $v0, $t8 # value = table->lut5[index]
- lhu $v0, DECDCTTAB_lut5($v0)
- sll $t0, 8 # window <<= 5 + 3
- b .Lwrite_value
- addiu $t5, -8 # bit_offset -= 5 + 3
- .word 0
+ # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(uint16_t)
+ # *output = huffman_table->ac5[index]
+ srl value, window, 23
+ andi value, 14
+ addu value, huffman_table
+ lhu value, VLC_Table_ac5(value)
+ sll window, 8
+ addiu bit_offset, -8
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_000001:
# Prefix 000001 is an escape code followed by a full 16-bit MDEC value.
- srl $v0, $t0, 10 # value = window >> (32 - 6 - 16)
- sll $t0, 22 # window <<= 6 + 16
- b .Lwrite_value
- addiu $t5, -22 # bit_offset -= 6 + 16
- .word 0, 0, 0, 0
+ # *output = window >> (32 - 6 - 16)
+ srl value, window, 10
+ sll window, 22
+ addiu bit_offset, -22
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
+ .word 0, 0, 0
.Lac_prefix_0000001:
# Prefix 0000001 is followed by a 4-bit lookup index.
- srl $v0, $t0, 20 # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(u16)
- andi $v0, 30
- addu $v0, $t8 # value = table->lut7[index]
- lhu $v0, DECDCTTAB_lut7($v0)
- sll $t0, 11 # window <<= 7 + 4
- b .Lwrite_value
- addiu $t5, -11 # bit_offset -= 7 + 4
- .word 0
+ # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(uint16_t)
+ # *output = huffman_table->ac7[index]
+ srl value, window, 20
+ andi value, 30
+ addu value, huffman_table
+ lhu value, VLC_Table_ac7(value)
+ sll window, 11
+ addiu bit_offset, -11
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_00000001:
# Prefix 00000001 is followed by a 5-bit lookup index.
- srl $v0, $t0, 18 # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(u16)
- andi $v0, 62
- addu $v0, $t8 # value = table->lut8[index]
- lhu $v0, DECDCTTAB_lut8($v0)
- sll $t0, 13 # window <<= 8 + 5
- b .Lwrite_value
- addiu $t5, -13 # bit_offset -= 8 + 5
- .word 0
+ # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(uint16_t)
+ # *output = huffman_table->ac8[index]
+ srl value, window, 18
+ andi value, 62
+ addu value, huffman_table
+ lhu value, VLC_Table_ac8(value)
+ sll window, 13
+ addiu bit_offset, -13
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_000000001:
# Prefix 000000001 is followed by a 5-bit lookup index.
- srl $v0, $t0, 17 # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(u16)
- andi $v0, 62
- addu $v0, $t8 # value = table->lut9[index]
- lhu $v0, DECDCTTAB_lut9($v0)
- sll $t0, 14 # window <<= 9 + 5
- b .Lwrite_value
- addiu $t5, -14 # bit_offset -= 9 + 5
- .word 0
+ # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(uint16_t)
+ # *output = huffman_table->ac9[index]
+ srl value, window, 17
+ andi value, 62
+ addu value, huffman_table
+ lhu value, VLC_Table_ac9(value)
+ sll window, 14
+ addiu bit_offset, -14
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_0000000001:
# Prefix 0000000001 is followed by a 5-bit lookup index.
- srl $v0, $t0, 16 # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(u16)
- andi $v0, 62
- addu $v0, $t8 # value = table->lut10[index]
- lhu $v0, DECDCTTAB_lut10($v0)
- sll $t0, 15 # window <<= 10 + 5
- b .Lwrite_value
- addiu $t5, -15 # bit_offset -= 10 + 5
- .word 0
+ # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(uint16_t)
+ # *output = huffman_table->ac10[index]
+ srl value, window, 16
+ andi value, 62
+ addu value, huffman_table
+ lhu value, VLC_Table_ac10(value)
+ sll window, 15
+ addiu bit_offset, -15
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_00000000001:
# Prefix 00000000001 is followed by a 5-bit lookup index.
- srl $v0, $t0, 15 # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(u16)
- andi $v0, 62
- addu $v0, $t8 # value = table->lut11[index]
- lhu $v0, DECDCTTAB_lut11($v0)
- sll $t0, 16 # window <<= 11 + 5
- b .Lwrite_value
- addiu $t5, -16 # bit_offset -= 11 + 5
- .word 0
+ # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(uint16_t)
+ # *output = huffman_table->ac11[index]
+ srl value, window, 15
+ andi value, 62
+ addu value, huffman_table
+ lhu value, VLC_Table_ac11(value)
+ sll window, 16
+ addiu bit_offset, -16
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
.Lac_prefix_000000000001:
# Prefix 000000000001 is followed by a 5-bit lookup index.
- srl $v0, $t0, 14 # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(u16)
- andi $v0, 62
- addu $v0, $t8 # value = table->lut12[index]
- lhu $v0, DECDCTTAB_lut12($v0)
- sll $t0, 17 # window <<= 12 + 5
- b .Lwrite_value
- addiu $t5, -17 # bit_offset -= 12 + 5
- .word 0
-
-.Lupdate_window_and_write:
- sllv $t0, $t0, $v1 # window <<= length
- subu $t5, $v1 # bit_offset -= length
-.Lwrite_value:
- sh $v0, 0($a1)
+ # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(uint16_t)
+ # *output = huffman_table->ac12[index]
+ srl value, window, 14
+ andi value, 62
+ addu value, huffman_table
+ lhu value, VLC_Table_ac12(value)
+ sll window, 17
+ addiu bit_offset, -17
+
+ b .Lfeed_bitstream
+ sh value, 0(output)
+
+.Lupdate_window_ac:
+ srl length, value, 16
+.Lupdate_window_dc:
+ sllv window, window, length
+ subu bit_offset, length
+
.Lfeed_bitstream:
# Update the window. This makes sure the next iteration of the loop will be
# able to read up to 32 bits from the bitstream.
- bgez $t5, .Lskip_feeding # if (bit_offset < 0)
- addiu $a2, -1 # max_size--
-
- subu $v0, $0, $t5 # window = next_window << (-bit_offset)
- sllv $t0, $t1, $v0
- lw $t1, 0($a3) # next_window = (*input << 16) | (*input >> 16)
- addiu $t5, 32 # bit_offset += 32
- srl $v0, $t1, 16
- sll $t1, 16
- or $t1, $v0
- addiu $a3, 4 # input++
+ bgez bit_offset, .Lskip_feeding # if (bit_offset < 0)
+ addiu max_size, -1
+
+ subu temp, $0, bit_offset # window = next_window << (-bit_offset)
+ sllv window, next_window, temp
+ lw next_window, 0(input) # next_window = (*input << 16) | (*input >> 16)
+ addiu bit_offset, 32
+ srl temp, next_window, 16
+ sll next_window, 16
+ or next_window, temp
+ addiu input, 4
.Lskip_feeding:
- srlv $v0, $t1, $t5 # window |= next_window >> bit_offset
- or $t0, $v0
+ srlv temp, next_window, bit_offset # window |= next_window >> bit_offset
+ or window, temp
- bnez $a2, .Lprocess_next_code_loop
- addiu $a1, 2 # output++
+ bnez max_size, .Lprocess_next_code_loop
+ addiu output, 2
.Lstop_processing:
# If remaining = 0, skip flushing the context, pad the output buffer with
# end-of-block codes if necessary and return 0. Otherwise flush the context
# and return 1.
- beqz $t2, .Lpad_output_buffer
- nop
-
- sw $a3, VLC_Context_input($a0)
- sw $t0, VLC_Context_window($a0)
- sw $t1, VLC_Context_next_window($a0)
- sw $t2, VLC_Context_remaining($a0)
- sh $t3, VLC_Context_quant_scale($a0)
- sb $t4, VLC_Context_is_v3($a0)
- sb $t5, VLC_Context_bit_offset($a0)
- sb $t6, VLC_Context_block_index($a0)
- sb $t7, VLC_Context_coeff_index($a0)
-
- jr $ra
+ beqz remaining, .Lpad_output_buffer
+ li temp, 0xfe00
+
+ sw input, VLC_Context_input(ctx)
+ sw window, VLC_Context_window(ctx)
+ sw next_window, VLC_Context_next_window(ctx)
+ sw remaining, VLC_Context_remaining(ctx)
+ sb bit_offset, VLC_Context_bit_offset(ctx)
+ sb block_index, VLC_Context_block_index(ctx)
+ sb coeff_index, VLC_Context_coeff_index(ctx)
+ sh last_y, VLC_Context_last_y(ctx)
+ sh last_cr, VLC_Context_last_cr(ctx)
+ sh last_cb, VLC_Context_last_cb(ctx)
+
+ b .Lreturn
li $v0, 1
.Lpad_output_buffer:
- beqz $a2, .Lreturn_zero
- li $v0, 0xfe00
-.Lpad_output_buffer_loop: # while (max_size)
- sh $v0, 0($a1) # *output = 0xfe00
- addiu $a2, -1 # max_size--
- bnez $a2, .Lpad_output_buffer_loop
- addiu $a1, 2 # output++
+ beqz max_size, .Lreturn
+ li $v0, 0
-.Lreturn_zero:
+.Lpad_output_buffer_loop: # while (max_size)
+ sh temp, 0(output)
+ addiu max_size, -1
+ bnez max_size, .Lpad_output_buffer_loop
+ addiu output, 2
+
+.Lreturn:
+ lw $s0, 0($sp)
+ lw $s1, 4($sp)
+ lw $s2, 8($sp)
+ lw $s3, 12($sp)
jr $ra
- li $v0, 0
+ addiu $sp, 16
diff --git a/libpsn00b/psxpress/vlc2.c b/libpsn00b/psxpress/vlc2.c
index 9eb99bf..7d9d9f3 100644
--- a/libpsn00b/psxpress/vlc2.c
+++ b/libpsn00b/psxpress/vlc2.c
@@ -63,7 +63,7 @@ static const uint32_t _compressed_table[TABLE_LENGTH] = {
static VLC_Context _default_context;
static size_t _max_buffer_size = 0;
-const DECDCTTAB2 *_vlc_huffman_table2 = 0;
+const DECDCTTAB *_vlc_huffman_table2 = 0;
/* VLC decoder */
@@ -77,14 +77,17 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2(
VLC_Context *ctx, uint32_t *buf, size_t max_size
) {
const uint32_t *input = ctx->input;
- uint32_t remaining = ctx->remaining;
uint32_t window = ctx->window;
uint32_t next_window = ctx->next_window;
- uint16_t quant_scale = ctx->quant_scale;
+ uint32_t remaining = ctx->remaining;
+ int is_v3 = ctx->is_v3;
+ int bit_offset = ctx->bit_offset;
int block_index = ctx->block_index;
int coeff_index = ctx->coeff_index;
- int bit_offset = ctx->bit_offset;
- int is_v3 = ctx->is_v3;
+ uint16_t quant_scale = ctx->quant_scale;
+ int16_t last_y = ctx->last_y;
+ int16_t last_cr = ctx->last_cr;
+ int16_t last_cb = ctx->last_cb;
//if (!_vlc_huffman_table2)
//return -1;
@@ -120,25 +123,22 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2(
*output = (uint16_t) _get_bits_unsigned(22);
_advance_window(22);
} else if (window >> 24) {
- // The first lookup table is for codes that not start with
+ // The first lookup table is for codes that do not start with
// 00000000.
- value = _vlc_huffman_table2->lut[_get_bits_unsigned(13)];
+ value = _vlc_huffman_table2->ac[_get_bits_unsigned(13)];
_advance_window(value >> 16);
*output = (uint16_t) value;
} else {
// If the code starts with 00000000, use the second lookup
// table.
- value = _vlc_huffman_table2->lut00[_get_bits_unsigned(17)];
+ value = _vlc_huffman_table2->ac00[_get_bits_unsigned(17)];
_advance_window(value >> 16);
*output = (uint16_t) value;
}
} else {
- // Parse the DC (first) coefficient for this block. Version 2
- // simply stores the signed 10-bit value as-is, while version 3
- // uses a delta encoding combined with a compression method similar
- // to exp-Golomb.
+ // Parse the DC (first) coefficient for this block.
if (is_v3) {
- // TODO: version 3 is currently not supported.
+ // This implementation does not support version 3.
return -1;
} else {
value = _get_bits_unsigned(10);
@@ -158,7 +158,7 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2(
// time and processes each 16-bit word starting from the the MSB, so an
// endianness conversion is necessary to preserve bit order when
// reading 32 bits at a time. Also note that the PS1 CPU is not capable
- // of shifting by more than 31 bits - it will shift by 0 bits instead!
+ // of shifting by >=31 bits - it will shift by (N % 32) bits instead!
if (bit_offset < 0) {
window = next_window << (-bit_offset);
bit_offset += 32;
@@ -176,12 +176,15 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2(
return 0;
ctx->input = input;
- ctx->remaining = remaining;
ctx->window = window;
ctx->next_window = next_window;
+ ctx->remaining = remaining;
+ ctx->bit_offset = bit_offset;
ctx->block_index = block_index;
ctx->coeff_index = coeff_index;
- ctx->bit_offset = bit_offset;
+ ctx->last_y = last_y;
+ ctx->last_cr = last_cr;
+ ctx->last_cb = last_cb;
return 1;
}
@@ -197,21 +200,24 @@ int DecDCTvlcStart2(
return -1;
ctx->input = &input[2];
- ctx->remaining = (header->mdec0_header & 0xffff) * 2;
ctx->window = (input[0] << 16) | (input[0] >> 16);
ctx->next_window = (input[1] << 16) | (input[1] >> 16);
- ctx->quant_scale = (header->quant_scale & 63) << 10;
+ ctx->remaining = (header->mdec0_header & 0xffff) * 2;
+ ctx->is_v3 = (header->version >= 3);
+ ctx->bit_offset = 32;
ctx->block_index = 0;
ctx->coeff_index = 0;
- ctx->bit_offset = 32;
- ctx->is_v3 = (header->version == 3);
+ ctx->quant_scale = (header->quant_scale & 63) << 10;
+ ctx->last_y = 0;
+ ctx->last_cr = 0;
+ ctx->last_cb = 0;
return DecDCTvlcContinue2(ctx, buf, max_size);
}
/* Stateful VLC decoder API (for Sony SDK compatibility) */
-int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table) {
+int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table) {
if (table)
_vlc_huffman_table2 = table;
@@ -230,7 +236,7 @@ size_t DecDCTvlcSize2(size_t size) {
/* Lookup table decompressor */
-void DecDCTvlcBuild(DECDCTTAB2 *table) {
+void DecDCTvlcBuild(DECDCTTAB *table) {
uint32_t *output = (uint32_t *) table;
_vlc_huffman_table2 = table;
diff --git a/libpsn00b/psxsio/tty.c b/libpsn00b/psxsio/tty.c
index a1b33c8..a88af85 100644
--- a/libpsn00b/psxsio/tty.c
+++ b/libpsn00b/psxsio/tty.c
@@ -8,7 +8,7 @@
* critical section or even from an interrupt handler.
*/
-#include <ioctl.h>
+#include <sys/ioctl.h>
#include <psxapi.h>
#include <psxsio.h>
#include <hwregs_c.h>
@@ -93,8 +93,8 @@ void AddSIO(int baud) {
close(0);
close(1);
- DelDev(_sio_dcb.name);
- AddDev(&_sio_dcb);
+ DelDrv(_sio_dcb.name);
+ AddDrv(&_sio_dcb);
open(_sio_dcb.name, 2);
open(_sio_dcb.name, 1);
}
@@ -102,6 +102,6 @@ void AddSIO(int baud) {
void DelSIO(void) {
SIO_Quit();
- DelDev(_sio_dcb.name);
- AddDummyTty();
+ DelDrv(_sio_dcb.name);
+ add_nullcon_driver();
}
diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c
index 45654ad..64c6d1b 100644
--- a/libpsn00b/psxspu/common.c
+++ b/libpsn00b/psxspu/common.c
@@ -1,10 +1,11 @@
/*
* PSn00bSDK SPU library (common functions)
- * (C) 2022 spicyjpeg - MPL licensed
+ * (C) 2022-2023 spicyjpeg - MPL licensed
*/
#include <stdint.h>
#include <assert.h>
+#include <psxetc.h>
#include <psxspu.h>
#include <hwregs_c.h>
@@ -31,7 +32,7 @@ static void _wait_status(uint16_t mask, uint16_t value) {
return;
}
- _sdk_log("status register timeout (0x%04x)\n", SPU_STAT);
+ _sdk_log("timeout, status=0x%04x\n", SPU_STAT);
}
static size_t _dma_transfer(uint32_t *data, size_t length, int write) {
@@ -59,13 +60,14 @@ static size_t _dma_transfer(uint32_t *data, size_t length, int write) {
SPU_CTRL |= ctrl;
_wait_status(0x0030, ctrl);
- DMA_MADR(4) = (uint32_t) data;
+ DMA_MADR(DMA_SPU) = (uint32_t) data;
if (length < DMA_CHUNK_LENGTH)
- DMA_BCR(4) = 0x00010000 | length;
+ DMA_BCR(DMA_SPU) = 0x00010000 | length;
else
- DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
+ DMA_BCR(DMA_SPU) = DMA_CHUNK_LENGTH |
+ ((length / DMA_CHUNK_LENGTH) << 16);
- DMA_CHCR(4) = 0x01000200 | write;
+ DMA_CHCR(DMA_SPU) = 0x01000200 | write;
return length;
}
@@ -130,8 +132,8 @@ void SpuInit(void) {
SPU_EXT_VOL_L = 0;
SPU_EXT_VOL_R = 0;
- DMA_DPCR |= 0x000b0000; // Enable DMA4
- DMA_CHCR(4) = 0x00000201; // Stop DMA4
+ SetDMAPriority(DMA_SPU, 3);
+ DMA_CHCR(DMA_SPU) = 0x00000201; // Stop DMA
SPU_DMA_CTRL = 0x0004; // Reset transfer mode
SPU_CTRL = 0xc001; // Enable SPU, DAC, CD audio, disable DMA request
@@ -162,12 +164,18 @@ void SpuInit(void) {
}
size_t SpuRead(uint32_t *data, size_t size) {
+ _sdk_validate_args(data && size, 0);
+
return _dma_transfer(data, size, 0) * 4;
}
size_t SpuWrite(const uint32_t *data, size_t size) {
- if (_transfer_addr < WRITABLE_AREA_ADDR)
+ _sdk_validate_args(data && size, 0);
+
+ if (_transfer_addr < WRITABLE_AREA_ADDR) {
+ _sdk_log("ignoring attempt to write to capture buffers at 0x%05x\n", _transfer_addr);
return 0;
+ }
// I/O transfer mode is not that useful, but whatever.
if (_transfer_mode)
@@ -177,6 +185,8 @@ size_t SpuWrite(const uint32_t *data, size_t size) {
}
size_t SpuWritePartly(const uint32_t *data, size_t size) {
+ //_sdk_validate_args(data && size, 0);
+
size_t _size = SpuWrite(data, size);
_transfer_addr += (_size + 1) / 2;
@@ -188,6 +198,10 @@ SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) {
return mode;
}
+SPU_TransferMode SpuGetTransferMode(void) {
+ return _transfer_mode;
+}
+
uint32_t SpuSetTransferStartAddr(uint32_t addr) {
if (addr > 0x7ffff)
return 0;
@@ -196,6 +210,10 @@ uint32_t SpuSetTransferStartAddr(uint32_t addr) {
return addr;
}
+uint32_t SpuGetTransferStartAddr(void) {
+ return _transfer_addr * 8;
+}
+
int SpuIsTransferCompleted(int mode) {
if (!mode)
return ((SPU_STAT >> 10) & 1) ^ 1;