aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspicyjpeg <thatspicyjpeg@gmail.com>2022-10-08 12:44:34 +0200
committerspicyjpeg <thatspicyjpeg@gmail.com>2022-10-08 12:44:34 +0200
commit82a259240d9c63d4656b9dae0b46a3689840473b (patch)
treede216269b5f82d67bf8cf3d69ab715d7b0fcd80c
parent3acc4f93f3b6a96da484aab1f08d61f900085e43 (diff)
downloadpsn00bsdk-82a259240d9c63d4656b9dae0b46a3689840473b.tar.gz
Optimize memset(), add heap usage API, remove _mem_init()
-rw-r--r--libpsn00b/include/stdlib.h15
-rw-r--r--libpsn00b/libc/malloc.c62
-rw-r--r--libpsn00b/libc/memset.s132
-rw-r--r--libpsn00b/libc/start.c25
4 files changed, 184 insertions, 50 deletions
diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h
index 1888c69..f0753c1 100644
--- a/libpsn00b/include/stdlib.h
+++ b/libpsn00b/include/stdlib.h
@@ -12,6 +12,16 @@
#define RAND_MAX 0x7fff
+/* Structure definitions */
+
+typedef struct _HeapUsage {
+ size_t total; // Total size of heap + stack
+ size_t heap; // Amount of memory currently reserved for heap
+ size_t stack; // Amount of memory currently reserved for stack
+ size_t alloc; // Amount of memory currently allocated
+ size_t alloc_max; // Maximum amount of memory ever allocated
+} HeapUsage;
+
/* API */
#ifdef __cplusplus
@@ -33,11 +43,12 @@ long double strtold(const char *nptr, char **endptr);
double strtod(const char *nptr, char **endptr);
float strtof(const char *nptr, char **endptr);
-void _mem_init(size_t ram_size, size_t stack_max_size);
void InitHeap(void *addr, size_t size);
-//int SetHeapSize(size_t size);
void *sbrk(ptrdiff_t incr);
+void TrackHeapUsage(ptrdiff_t alloc_incr);
+void GetHeapUsage(HeapUsage *usage);
+
void *malloc(size_t size);
void *calloc(size_t num, size_t size);
void *realloc(void *ptr, size_t size);
diff --git a/libpsn00b/libc/malloc.c b/libpsn00b/libc/malloc.c
index 9d538cd..acac753 100644
--- a/libpsn00b/libc/malloc.c
+++ b/libpsn00b/libc/malloc.c
@@ -9,6 +9,8 @@
* latter being built on top of the former. This makes it possible to override
* only InitHeap() and sbrk() while still using the default allocator, or
* override malloc()/realloc()/free() while using the default heap manager.
+ * Custom allocators should call TrackHeapUsage() to let the heap manager know
+ * how much memory is allocated at a given time.
*/
#include <stddef.h>
@@ -25,11 +27,13 @@ typedef struct _BlockHeader {
size_t size;
} BlockHeader;
-/* Data */
+/* Internal globals */
static void *_heap_start, *_heap_end, *_heap_limit;
-static void *_alloc_start = 0;
-static BlockHeader *_alloc_head = 0, *_alloc_tail = 0;
+static size_t _heap_alloc, _heap_alloc_max;
+
+static void *_alloc_start;
+static BlockHeader *_alloc_head, *_alloc_tail;
/* Heap management API */
@@ -37,6 +41,13 @@ __attribute__((weak)) void InitHeap(void *addr, size_t size) {
_heap_start = addr;
_heap_end = addr;
_heap_limit = (void *) ((uintptr_t) addr + size);
+
+ _heap_alloc = 0;
+ _heap_alloc_max = 0;
+
+ _alloc_start = addr;
+ _alloc_head = 0;
+ _alloc_tail = 0;
}
__attribute__((weak)) void *sbrk(ptrdiff_t incr) {
@@ -50,6 +61,22 @@ __attribute__((weak)) void *sbrk(ptrdiff_t incr) {
return old_end;
}
+__attribute__((weak)) void TrackHeapUsage(ptrdiff_t alloc_incr) {
+ _heap_alloc += alloc_incr;
+
+ if (_heap_alloc > _heap_alloc_max)
+ _heap_alloc_max = _heap_alloc;
+}
+
+__attribute__((weak)) void GetHeapUsage(HeapUsage *usage) {
+ usage->total = _heap_limit - _heap_start;
+ usage->heap = _heap_end - _heap_start;
+ usage->stack = _heap_limit - _heap_end;
+
+ usage->alloc = _heap_alloc;
+ usage->alloc_max = _heap_alloc_max;
+}
+
/* Memory allocator */
static BlockHeader *_find_fit(BlockHeader *head, size_t size) {
@@ -69,13 +96,16 @@ static BlockHeader *_find_fit(BlockHeader *head, size_t size) {
}
__attribute__((weak)) void *malloc(size_t size) {
+ if (!size)
+ return 0;
+
size_t _size = _align(size + sizeof(BlockHeader), 8);
// Nothing's initialized yet? Let's just initialize the bottom of our heap,
// flag it as allocated.
if (!_alloc_head) {
- if (!_alloc_start)
- _alloc_start = sbrk(0);
+ //if (!_alloc_start)
+ //_alloc_start = sbrk(0);
BlockHeader *new = (BlockHeader *) sbrk(_size);
if (!new)
@@ -89,6 +119,8 @@ __attribute__((weak)) void *malloc(size_t size) {
_alloc_head = new;
_alloc_tail = new;
+
+ TrackHeapUsage(_size);
return ptr;
}
@@ -106,6 +138,8 @@ __attribute__((weak)) void *malloc(size_t size) {
_alloc_head->prev = new;
_alloc_head = new;
+
+ TrackHeapUsage(_size);
return ptr;
}
@@ -122,6 +156,8 @@ __attribute__((weak)) void *malloc(size_t size) {
(new->next)->prev = new;
prev->next = new;
+
+ TrackHeapUsage(_size);
return ptr;
}
@@ -138,6 +174,8 @@ __attribute__((weak)) void *malloc(size_t size) {
_alloc_tail->next = new;
_alloc_tail = new;
+
+ TrackHeapUsage(_size);
return ptr;
}
@@ -153,13 +191,14 @@ __attribute__((weak)) void *realloc(void *ptr, size_t size) {
if (!ptr)
return malloc(size);
- size_t _size = _align(size + sizeof(BlockHeader), 8);
-
+ size_t _size = _align(size + sizeof(BlockHeader), 8);
BlockHeader *prev = (BlockHeader *) ((uintptr_t) ptr - sizeof(BlockHeader));
// New memory block shorter?
if (prev->size >= _size) {
+ TrackHeapUsage(_size - prev->size);
prev->size = _size;
+
if (!prev->next)
sbrk((ptr - sbrk(0)) + _size);
@@ -172,12 +211,14 @@ __attribute__((weak)) void *realloc(void *ptr, size_t size) {
if (!new)
return 0;
+ TrackHeapUsage(_size - prev->size);
prev->size = _size;
return ptr;
}
// Do we have free memory after it?
if (((prev->next)->ptr - ptr) > _size) {
+ TrackHeapUsage(_size - prev->size);
prev->size = _size;
return ptr;
}
@@ -209,11 +250,13 @@ __attribute__((weak)) void free(void *ptr) {
sbrk(-size);
}
+ TrackHeapUsage(-size);
return;
}
// Finding the proper block
BlockHeader *cur = _alloc_head;
+
for (cur = _alloc_head; ptr != cur->ptr; cur = cur->next) {
if (!cur->next)
return;
@@ -221,14 +264,17 @@ __attribute__((weak)) void free(void *ptr) {
if (cur->next) {
// In the middle, just unlink it
- cur->next->prev = cur->prev;
+ (cur->next)->prev = cur->prev;
+ TrackHeapUsage(-(cur->size + sizeof(BlockHeader)));
} else {
// At the end, shrink heap
_alloc_tail = cur->prev;
void *top = sbrk(0);
size_t size = (top - (cur->prev)->ptr) - (cur->prev)->size;
+
sbrk(-size);
+ TrackHeapUsage(-size);
}
(cur->prev)->next = cur->next;
diff --git a/libpsn00b/libc/memset.s b/libpsn00b/libc/memset.s
index b3a3af3..5a1589d 100644
--- a/libpsn00b/libc/memset.s
+++ b/libpsn00b/libc/memset.s
@@ -1,25 +1,117 @@
-# High speed ASM memset implementation by Lameguy64
-#
-# Part of PSn00bSDK
+# PSn00bSDK optimized memset
+# (C) 2022 spicyjpeg - MPL licensed
.set noreorder
-.section .text
-
-# Arguments:
-# a0 - address to buffer
-# a1 - value to set
-# a2 - bytes to set
+.section .text.memset
.global memset
-.type memset,@function
+.type memset, @function
memset:
- move $v0, $a0
- blez $a2, .Lexit
- addi $a2, -1
- sb $a1, 0($a0)
- b memset
- addiu $a0, 1
-.Lexit:
- jr $ra
- nop
- \ No newline at end of file
+ # If more than 16 bytes have to be written then take the "large" path,
+ # otherwise use the code below.
+ addiu $t0, $a2, -16
+ bgtz $t0, .Llarge_fill
+ move $v0, $a0 # return_value = dest
+
+ # Jump to one of the sb opcodes below. This is basically a cut-down Duff's
+ # device implementation with no looping.
+ la $t0, .Lsmall_duff + 0x40 # jump_addr = &small_duff[(16 - count) * 4]
+ sll $t1, $a2, 2
+ subu $t0, $t1
+ addu $a0, $a2 # dest -= 16 - count
+ jr $t0
+ addiu $a0, -16
+
+.Lsmall_duff:
+ sb $a1, 0x0($a0)
+ sb $a1, 0x1($a0)
+ sb $a1, 0x2($a0)
+ sb $a1, 0x3($a0)
+ sb $a1, 0x4($a0)
+ sb $a1, 0x5($a0)
+ sb $a1, 0x6($a0)
+ sb $a1, 0x7($a0)
+ sb $a1, 0x8($a0)
+ sb $a1, 0x9($a0)
+ sb $a1, 0xa($a0)
+ sb $a1, 0xb($a0)
+ sb $a1, 0xc($a0)
+ sb $a1, 0xd($a0)
+ sb $a1, 0xe($a0)
+ jr $ra
+ sb $a1, 0xf($a0)
+
+.Llarge_fill:
+ # Initialize fast filling by repeating the fill byte 4 times, so it can be
+ # written 32 bits at a time.
+ andi $a1, 0xff # ch &= 0xff
+ sll $t0, $a1, 8 # ch |= (ch << 8) | (ch << 16) | (ch << 24)
+ or $a1, $t0
+ sll $t0, $a1, 16
+ or $a1, $t0
+
+ # Fill the first 1-4 bytes (here the swr instruction does all the magic)
+ # and update dest and count accordingly.
+ swr $a1, 0($a0)
+ andi $t0, $a0, 3 # align = 4 - (dest % 4)
+ addiu $t0, -4
+ addu $a2, $t0 # count -= align
+ subu $a0, $t0 # dest += align
+
+ la $t1, .Llarge_duff
+ andi $t2, $a2, 3 # remainder = count % 4
+ subu $a2, $t2 # count -= remainder
+
+.Llarge_fill_loop:
+ # If 128 bytes or more still have to be written, skip calculating the jump
+ # offset and execute the whole block of sw opcodes.
+ addiu $a2, -0x80 # count -= 0x80
+ bgez $a2, .Llarge_duff
+ #nop
+
+ # Jump to one of the sw opcodes below. This is the "full" Duff's device.
+ subu $t0, $t1, $a2 # jump_addr = &large_duff[0x80 - (count + 0x80)]
+ jr $t0
+ addu $a0, $a2 # dest -= 0x80 - (count + 0x80)
+
+.Llarge_duff:
+ sw $a1, 0x00($a0)
+ sw $a1, 0x04($a0)
+ sw $a1, 0x08($a0)
+ sw $a1, 0x0c($a0)
+ sw $a1, 0x10($a0)
+ sw $a1, 0x14($a0)
+ sw $a1, 0x18($a0)
+ sw $a1, 0x1c($a0)
+ sw $a1, 0x20($a0)
+ sw $a1, 0x24($a0)
+ sw $a1, 0x28($a0)
+ sw $a1, 0x2c($a0)
+ sw $a1, 0x30($a0)
+ sw $a1, 0x34($a0)
+ sw $a1, 0x38($a0)
+ sw $a1, 0x3c($a0)
+ sw $a1, 0x40($a0)
+ sw $a1, 0x44($a0)
+ sw $a1, 0x48($a0)
+ sw $a1, 0x4c($a0)
+ sw $a1, 0x50($a0)
+ sw $a1, 0x54($a0)
+ sw $a1, 0x58($a0)
+ sw $a1, 0x5c($a0)
+ sw $a1, 0x60($a0)
+ sw $a1, 0x64($a0)
+ sw $a1, 0x68($a0)
+ sw $a1, 0x6c($a0)
+ sw $a1, 0x70($a0)
+ sw $a1, 0x74($a0)
+ sw $a1, 0x78($a0)
+ sw $a1, 0x7c($a0)
+
+ bgtz $a2, .Llarge_fill_loop
+ addiu $a0, 0x80 # dest += 0x80
+
+ # Fill the remaining 1-4 bytes, using (again) an unaligned store.
+ addu $a0, $t2 # last_byte = dest + remainder - 1
+ jr $ra
+ swl $a1, -1($a0)
diff --git a/libpsn00b/libc/start.c b/libpsn00b/libc/start.c
index 87ac951..9ff09c8 100644
--- a/libpsn00b/libc/start.c
+++ b/libpsn00b/libc/start.c
@@ -21,7 +21,7 @@ const char **__argv;
static const char *_argv_buffer[ARGC_MAX];
static char _arg_string_buffer[132];
-static void _parse_kernel_args() {
+static void _parse_kernel_args(void) {
// Copy the argument string from kernel memory into a private buffer (which
// won't be cleared or deallocated) and trim it at the first newline.
memset(_arg_string_buffer, 0, 132);
@@ -48,7 +48,7 @@ static void _parse_kernel_args() {
}
}
-/* Heap initialization */
+/* Main */
// These are defined by the linker script. Note that these are *NOT* pointers,
// they are virtual symbols whose location matches their value. The simplest
@@ -58,20 +58,6 @@ extern uint8_t __bss_start[];
extern uint8_t _end[];
//extern uint8_t _gp[];
-// This function should not be called manually in most cases. It might be
-// useful though to change the stack size and/or reinitialize the heap on
-// systems that have more than 2 MB of RAM (e.g. emulators, devkits, PS1-based
-// arcade boards).
-void _mem_init(size_t ram_size, size_t stack_max_size) {
- void *exe_end = _end + 4;
- size_t exe_size = (size_t) exe_end - (size_t) __text_start;
- size_t ram_used = (0x10000 + exe_size + stack_max_size) & 0xfffffffc;
-
- InitHeap(exe_end, ram_size - ram_used);
-}
-
-/* Main */
-
extern void (*__CTOR_LIST__[])(void);
extern void (*__DTOR_LIST__[])(void);
@@ -88,10 +74,9 @@ void _start_inner(int32_t override_argc, const char **override_argv) {
for (uint32_t *i = (uint32_t *) __bss_start; i < (uint32_t *) _end; i++)
*i = 0;
- // Initialize the heap, assuming 2 MB of RAM and reserving 128 KB for the
- // stack. Note that _mem_init() can be called again in main() to change
- // these values.
- _mem_init(0x200000, 0x20000);
+ // Initialize the heap and place it after the executable, assuming 2 MB of
+ // RAM. Note that InitHeap() can be called again in main().
+ InitHeap((void *) _end + 4, (void *) 0x801ffff8 - (void *) _end);
if (override_argv) {
__argc = override_argc;