summaryrefslogtreecommitdiff
path: root/libmeidogte
diff options
context:
space:
mode:
Diffstat (limited to 'libmeidogte')
-rw-r--r--libmeidogte/Makefile26
-rw-r--r--libmeidogte/applymatrixlv.s40
-rw-r--r--libmeidogte/compmatrixlv.s100
-rw-r--r--libmeidogte/gtereg.h80
-rw-r--r--libmeidogte/hirotmatrix.c35
-rw-r--r--libmeidogte/hisin.c33
-rw-r--r--libmeidogte/initgeom.s45
-rw-r--r--libmeidogte/inline_s.h227
-rw-r--r--libmeidogte/isin.c34
-rw-r--r--libmeidogte/matrix.c45
-rw-r--r--libmeidogte/meidogte.h170
-rw-r--r--libmeidogte/meidogte_inline.h433
-rw-r--r--libmeidogte/mulmatrix.s74
-rw-r--r--libmeidogte/mulmatrix0.s74
-rw-r--r--libmeidogte/pushpopmatrix.s68
-rw-r--r--libmeidogte/scalematrix.s68
-rw-r--r--libmeidogte/square0.s27
-rw-r--r--libmeidogte/squareroot.s121
-rw-r--r--libmeidogte/vectornormals.s107
19 files changed, 1807 insertions, 0 deletions
diff --git a/libmeidogte/Makefile b/libmeidogte/Makefile
new file mode 100644
index 0000000..938f3c2
--- /dev/null
+++ b/libmeidogte/Makefile
@@ -0,0 +1,26 @@
+include ../Makefile.cfg
+
+CFILES = $(notdir $(wildcard ./*.c))
+AFILES = $(notdir $(wildcard ./*.s))
+OFILES = $(CFILES:.c=.o) $(AFILES:.s=.o)
+
+TARGET = libmeidogte.a
+
+all: $(TARGET)
+
+$(TARGET): $(OFILES)
+ $(AR) cr $(TARGET) $(OFILES)
+ $(RANLIB) $(TARGET)
+
+%.o: %.c
+ $(CC) $(CFLAGS) -I./ -c $< -o $@
+
+%.o: %.s
+ $(AS) $(AFLAGS) -I ./ $< -o $@
+
+clean:
+ rm -Rf $(TARGET) $(OFILES)
+
+install: all
+ cp $(TARGET) $(TOOLCHAIN_PREFIX)/lib
+ cp meidogte.h meidogte_inline.h $(TOOLCHAIN_PREFIX)/include
diff --git a/libmeidogte/applymatrixlv.s b/libmeidogte/applymatrixlv.s
new file mode 100644
index 0000000..332a2f8
--- /dev/null
+++ b/libmeidogte/applymatrixlv.s
@@ -0,0 +1,40 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global ApplyMatrixLV
+.type ApplyMatrixLV, @function
+ApplyMatrixLV:
+
+ # Load matrix to GTE
+ lw $t0, 0($a0)
+ lw $t1, 4($a0)
+ ctc2 $t0, $0
+ ctc2 $t1, $1
+ lw $t0, 8($a0)
+ lw $t1, 12($a0)
+ lhu $t2, 16($a0)
+ ctc2 $t0, $2
+ ctc2 $t1, $3
+ ctc2 $t2, $4
+
+ lw $t0, 0($a1)
+ lw $t1, 4($a1)
+ mtc2 $t0, C2_IR1
+ lw $t0, 8($a1)
+ mtc2 $t1, C2_IR2
+ mtc2 $t0, C2_IR3
+
+ nMVMVA(1, 0, 3, 3, 0)
+
+ swc2 C2_IR1, 0($a2)
+ swc2 C2_IR2, 4($a2)
+ swc2 C2_IR3, 8($a2)
+
+ jr $ra
+ move $v0, $a2
+ \ No newline at end of file
diff --git a/libmeidogte/compmatrixlv.s b/libmeidogte/compmatrixlv.s
new file mode 100644
index 0000000..f613385
--- /dev/null
+++ b/libmeidogte/compmatrixlv.s
@@ -0,0 +1,100 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.set MATRIX_r11r12, 0
+.set MATRIX_r13r21, 4
+.set MATRIX_r22r23, 8
+.set MATRIX_r31r32, 12
+.set MATRIX_r33, 16
+.set MATRIX_trx, 20
+.set MATRIX_try, 24
+.set MATRIX_trz, 28
+
+
+.global CompMatrixLV
+.type CompMatrixLV, @function
+CompMatrixLV:
+
+ # Load matrix v0 to GTE
+ lw $t0, MATRIX_r11r12($a0)
+ lw $t1, MATRIX_r13r21($a0)
+ ctc2 $t0, C2_R11R12
+ ctc2 $t1, C2_R13R21
+ lw $t0, MATRIX_r22r23($a0)
+ lw $t1, MATRIX_r31r32($a0)
+ lhu $t2, MATRIX_r33($a0)
+ ctc2 $t0, C2_R22R23
+ lw $t0, MATRIX_trx($a0)
+ ctc2 $t1, C2_R31R32
+ lw $t1, MATRIX_try($a0)
+ ctc2 $t2, C2_R33
+ lw $t2, MATRIX_trz($a0)
+ ctc2 $t0, C2_TRX
+ ctc2 $t1, C2_TRY
+ ctc2 $t2, C2_TRZ
+
+ lw $t0, MATRIX_trx($a1)
+ lw $t1, MATRIX_try($a1)
+ mtc2 $t0, C2_IR1
+ lw $t0, MATRIX_trz($a1)
+ mtc2 $t1, C2_IR2
+ mtc2 $t0, C2_IR3
+
+ nMVMVA(1, 0, 3, 0, 0)
+
+ swc2 C2_IR1, MATRIX_trx($a2)
+ swc2 C2_IR2, MATRIX_try($a2)
+ swc2 C2_IR3, MATRIX_trz($a2)
+
+ lhu $t1, 2*(0+(3*1))($a1) # Load values for first
+ lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(0+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ lhu $t1, 2*(1+(3*1))($a1) # Load values for second
+ lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32
+ MVMVA(1, 0, 0, 3, 0) # First multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(1+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of first
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(0+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(0+(3*1))($a2)
+ sh $t0, 2*(0+(3*2))($a2)
+
+ lhu $t1, 2*(2+(3*1))($a1) # Load values for third
+ lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33
+ MVMVA(1, 0, 0, 3, 0) # Second multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(2+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of second
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(1+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(1+(3*1))($a2)
+ sh $t0, 2*(1+(3*2))($a2)
+ MVMVA(1, 0, 0, 3, 0) # Third multiply
+
+ mfc2 $t0, C2_IR1 # Store results of third
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(2+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(2+(3*1))($a2)
+ sh $t0, 2*(2+(3*2))($a2)
+
+ jr $ra
+ move $v0, $a2
diff --git a/libmeidogte/gtereg.h b/libmeidogte/gtereg.h
new file mode 100644
index 0000000..5d3391b
--- /dev/null
+++ b/libmeidogte/gtereg.h
@@ -0,0 +1,80 @@
+# GTE register definitions for GNU assembler (as).
+#
+# 2019 Meido-Tek Productions
+
+#
+# GTE data registers (use mfc2, mtc2, lwc2, swc2)
+#
+.set C2_VXY0, $0
+.set C2_VZ0, $1
+.set C2_VXY1, $2
+.set C2_VZ1, $3
+.set C2_VXY2, $4
+.set C2_VZ2, $5
+.set C2_RGB, $6
+.set C2_OTZ, $7
+
+.set C2_IR0, $8
+.set C2_IR1, $9
+.set C2_IR2, $10
+.set C2_IR3, $11
+.set C2_SXY0, $12
+.set C2_SXY1, $13
+.set C2_SXY2, $14
+.set C2_SXYP, $15
+
+.set C2_SZ0, $16
+.set C2_SZ1, $17
+.set C2_SZ2, $18
+.set C2_SZ3, $19
+.set C2_RGB0, $20
+.set C2_RGB1, $21
+.set C2_RGB2, $22
+
+.set C2_MAC0, $24
+.set C2_MAC1, $25
+.set C2_MAC2, $26
+.set C2_MAC3, $27
+.set C2_IRGB, $28
+.set C2_ORGB, $29
+.set C2_LZCS, $30
+.set C2_LZCR, $31
+
+#
+# GTE control registers (use cfc2/ctc2)
+#
+.set C2_R11R12, $0
+.set C2_R13R21, $1
+.set C2_R22R23, $2
+.set C2_R31R32, $3
+.set C2_R33, $4
+.set C2_TRX, $5
+.set C2_TRY, $6
+.set C2_TRZ, $7
+
+.set C2_L11L12, $8
+.set C2_L13L21, $9
+.set C2_L22L23, $10
+.set C2_L31L32, $11
+.set C2_L33, $12
+.set C2_RBK, $13
+.set C2_GBK, $14
+.set C2_BBK, $15
+
+.set C2_LR1LR2, $16
+.set C2_LR3LG1, $17
+.set C2_LG2LG3, $18
+.set C2_LB1LB2, $19
+.set C2_LB3, $20
+.set C2_RFC, $21
+.set C2_GFC, $22
+.set C2_BFC, $23
+
+.set C2_OFX, $24
+.set C2_OFY, $25
+.set C2_H, $26
+.set C2_DQA, $27
+.set C2_DQB, $28
+.set C2_ZSF3, $29
+.set C2_ZSF4, $30
+.set C2_FLAG, $31
diff --git a/libmeidogte/hirotmatrix.c b/libmeidogte/hirotmatrix.c
new file mode 100644
index 0000000..5a252ff
--- /dev/null
+++ b/libmeidogte/hirotmatrix.c
@@ -0,0 +1,35 @@
+#include <meidogte.h>
+
+MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m) {
+
+ short s[3],c[3];
+ MATRIX tm[3];
+
+ s[0] = hisin(r->vx); s[1] = hisin(r->vy); s[2] = hisin(r->vz);
+ c[0] = hicos(r->vx); c[1] = hicos(r->vy); c[2] = hicos(r->vz);
+
+ // mX
+ m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0;
+ m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0];
+ m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0];
+
+ // mY
+ tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1];
+ tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0;
+ tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1];
+
+ // mZ
+ tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0;
+ tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0;
+ tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE;
+
+ PushMatrix();
+
+ MulMatrix0( m, &tm[0], &tm[2] );
+ MulMatrix0( &tm[2], &tm[1], m );
+
+ PopMatrix();
+
+ return m;
+
+}
diff --git a/libmeidogte/hisin.c b/libmeidogte/hisin.c
new file mode 100644
index 0000000..df03194
--- /dev/null
+++ b/libmeidogte/hisin.c
@@ -0,0 +1,33 @@
+/* Based on isin_S4 implementation from coranac:
+ * http://www.coranac.com/2009/07/sines/
+ *
+ */
+
+#define qN 15
+#define qA 12
+#define B 19900
+#define C 3516
+
+int hisin(int x) {
+
+ int c, y;
+
+ c= x<<(30-qN); // Semi-circle info into carry.
+ x -= 1<<qN; // sine -> cosine calc
+
+ x= x<<(31-qN); // Mask with PI
+ x= x>>(31-qN); // Note: SIGNED shift! (to qN)
+ x= x*x>>(2*qN-14); // x=x^2 To Q14
+
+ y= B - (x*C>>14); // B - x^2*C
+ y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C)
+
+ return c>=0 ? y : -y;
+
+}
+
+int hicos(int x) {
+
+ return hisin( x+32768 );
+
+}
diff --git a/libmeidogte/initgeom.s b/libmeidogte/initgeom.s
new file mode 100644
index 0000000..14ca293
--- /dev/null
+++ b/libmeidogte/initgeom.s
@@ -0,0 +1,45 @@
+.set noreorder
+
+.include "gtereg.h"
+
+.section .text
+
+
+.global InitGeom
+.type InitGeom, @function
+InitGeom:
+ addiu $sp, -4
+ sw $ra, 0($sp)
+
+ jal EnterCriticalSection
+ nop
+
+ mfc0 $v0, $12 # Get SR
+ lui $v1, 0x4000 # Set bit to enable cop2
+ or $v0, $v1
+ mtc0 $v0, $12 # Set new SR
+
+ jal ExitCriticalSection
+ nop
+
+ ctc2 $0 , $24 # Reset GTE offset
+ ctc2 $0 , $25
+
+ li $v0, 320 # Set default projection plane
+ ctc2 $v0, $26
+
+ li $v0, 0x155 # Set ZSF3 and ZSF4 defaults
+ ctc2 $v0, $29
+ li $v0, 0x100
+ ctc2 $v0, $30
+
+ li $v0, 0xef9e # DQA and DQB defaults
+ lui $v1, 0x0140
+ ctc2 $v0, C2_DQA
+ ctc2 $v1, C2_DQB
+
+ lw $ra, 0($sp)
+ addiu $sp, 4
+ jr $ra
+ nop
+
diff --git a/libmeidogte/inline_s.h b/libmeidogte/inline_s.h
new file mode 100644
index 0000000..08e5c38
--- /dev/null
+++ b/libmeidogte/inline_s.h
@@ -0,0 +1,227 @@
+# Inline GTE macros for GNU assembler (as).
+#
+# 2019 Meido-Tek Productions
+#
+
+.macro nRTPS
+ nop
+ nop
+ cop2 0x0180001
+.endm
+
+.macro nRTPT
+ nop
+ nop
+ cop2 0x0280030
+.endm
+
+.macro nNCLIP
+ nop
+ nop
+ cop2 0x1400006
+.endm
+
+.macro nAVSZ3
+ nop
+ nop
+ cop2 0x158002D
+.endm
+
+.macro nAVSZ4
+ nop
+ nop
+ cop2 0x168002E
+.endm
+
+.macro nMVMVA sf mx v cv lm
+ nop
+ nop
+ cop2 0x0400012|(\sf<<19)|(\mx<<17)|(\v<<15)|(\cv<<13)|(\lm<<10)
+.endm
+
+.macro nSQR sf
+ nop
+ nop
+ cop2 0x0A00428|(\sf<<19)
+.endm
+
+.macro nnOP sf lm # extra n to prevent conflict with the nop opcode
+ nop
+ nop
+ cop2 0x170000C|(\sf<<19)|(\lm<<10)
+.endm
+
+.macro nNCS
+ nop
+ nop
+ cop2 0x0C8041E
+.endm
+
+.macro nNCT
+ nop
+ nop
+ cop2 0x0D80420
+.endm
+
+.macro nNCCS
+ nop
+ nop
+ cop2 0x108041B
+.endm
+
+.macro nNCCT
+ nop
+ nop
+ cop2 0x118043F
+.endm
+
+.macro nNCDS
+ nop
+ nop
+ cop2 0x0E80413
+.endm
+
+.macro nNCDT
+ nop
+ nop
+ cop2 0x0F80416
+.endm
+
+.macro nCC
+ nop
+ nop
+ cop2 0x138041C
+.endm
+
+.macro nCDP
+ nop
+ nop
+ cop2 0x1280414
+.endm
+
+.macro nDCPL
+ nop
+ nop
+ cop2 0x0680029
+.endm
+
+.macro nDPCS
+ nop
+ nop
+ cop2 0x0780010
+.endm
+
+.macro nDPCT
+ nop
+ nop
+ cop2 0x0180001
+.endm
+
+.macro nINTPL
+ nop
+ nop
+ cop2 0x0980011
+.endm
+
+.macro nGPF sf
+ nop
+ nop
+ cop2 0x190003D|(\sf<<19)
+.endm
+
+.macro nGPL sf
+ nop
+ nop
+ cop2 0x1A0003E|(\sf<<19)
+.endm
+
+#
+# Macros without leading nops (for optimized usage)
+#
+.macro RTPS
+ cop2 0x0180001
+.endm
+
+.macro RTPT
+ cop2 0x0280030
+.endm
+
+.macro NCLIP
+ cop2 0x1400006
+.endm
+
+.macro AVSZ3
+ cop2 0x158002D
+.endm
+
+.macro AVSZ4
+ cop2 0x168002E
+.endm
+
+.macro MVMVA sf mx v cv lm
+ cop2 0x0400012|(\sf<<19)|(\mx<<17)|(\v<<15)|(\cv<<13)|(\lm<<10)
+.endm
+
+.macro SQR sf
+ cop2 0x0A00428|(\sf<<19)
+.endm
+
+.macro OP sf lm
+ cop2 0x170000C|(\sf<<19)|(\lm<<10)
+.endm
+
+.macro NCS
+ cop2 0x0C8041E
+.endm
+
+.macro NCT
+ cop2 0x0D80420
+.endm
+
+.macro NCCS
+ cop2 0x108041B
+.endm
+
+.macro NCCT
+ cop2 0x118043F
+.endm
+
+.macro NCDS
+ cop2 0x0E80413
+.endm
+
+.macro NCDT
+ cop2 0x0F80416
+.endm
+
+.macro CC
+ cop2 0x138041C
+.endm
+
+.macro CDP
+ cop2 0x1280414
+.endm
+
+.macro DCPL
+ cop2 0x0680029
+.endm
+
+.macro DPCS
+ cop2 0x0780010
+.endm
+
+.macro DPCT
+ cop2 0x0180001
+.endm
+
+.macro INTPL
+ cop2 0x0980011
+.endm
+
+.macro GPF sf
+ cop2 0x190003D|(\sf<<19)
+.endm
+
+.macro GPL sf
+ cop2 0x1A0003E|(\sf<<19)
+.endm
diff --git a/libmeidogte/isin.c b/libmeidogte/isin.c
new file mode 100644
index 0000000..3641efd
--- /dev/null
+++ b/libmeidogte/isin.c
@@ -0,0 +1,34 @@
+/* Based on isin_S4 implementation from coranac:
+ * http://www.coranac.com/2009/07/sines/
+ *
+ */
+
+#define qN 10
+#define qA 12
+#define B 19900
+#define C 3516
+
+int isin(int x) {
+
+ int c, y;
+
+ c= x<<(30-qN); // Semi-circle info into carry.
+ x -= 1<<qN; // sine -> cosine calc
+
+ x= x<<(31-qN); // Mask with PI
+ x= x>>(31-qN); // Note: SIGNED shift! (to qN)
+
+ x= x*x>>(2*qN-14); // x=x^2 To Q14
+
+ y= B - (x*C>>14); // B - x^2*C
+ y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C)
+
+ return c>=0 ? y : -y;
+
+}
+
+int icos(int x) {
+
+ return isin( x+1024 );
+
+}
diff --git a/libmeidogte/matrix.c b/libmeidogte/matrix.c
new file mode 100644
index 0000000..1c226e1
--- /dev/null
+++ b/libmeidogte/matrix.c
@@ -0,0 +1,45 @@
+#include <meidogte.h>
+
+MATRIX *RotMatrix(SVECTOR *r, MATRIX *m) {
+
+ short s[3],c[3];
+ MATRIX tm[3];
+
+ s[0] = isin(r->vx); s[1] = isin(r->vy); s[2] = isin(r->vz);
+ c[0] = icos(r->vx); c[1] = icos(r->vy); c[2] = icos(r->vz);
+
+ // mX
+ m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0;
+ m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0];
+ m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0];
+
+ // mY
+ tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1];
+ tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0;
+ tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1];
+
+ // mZ
+ tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0;
+ tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0;
+ tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE;
+
+ PushMatrix();
+
+ MulMatrix0( m, &tm[0], &tm[2] );
+ MulMatrix0( &tm[2], &tm[1], m );
+
+ PopMatrix();
+
+ return m;
+
+}
+
+MATRIX *TransMatrix(MATRIX *m, VECTOR *r) {
+
+ m->t[0] = r->vx;
+ m->t[1] = r->vy;
+ m->t[2] = r->vz;
+
+ return m;
+
+}
diff --git a/libmeidogte/meidogte.h b/libmeidogte/meidogte.h
new file mode 100644
index 0000000..3953701
--- /dev/null
+++ b/libmeidogte/meidogte.h
@@ -0,0 +1,170 @@
+#ifndef _MEIDOGTE_H
+#define _MEIDOGTE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <meidogte_inline.h>
+
+/**
+ * One degree = 4096
+ */
+#define ONE 4096
+
+
+typedef struct {
+ short m[3][3];
+ int t[3];
+} MATRIX;
+
+typedef struct {
+ int vx, vy, vz;
+} VECTOR;
+
+typedef struct {
+ short vx, vy, vz, pad;
+} SVECTOR;
+
+typedef struct {
+ unsigned char r, g, b, cd;
+} CVECTOR;
+
+/**
+ * Initialize MeidoGTE library
+ */
+
+void InitGeom();
+
+/**
+ * Integer sine function (4096 = 360 degrees)
+ * @param a Input
+ * @return Sine of input
+ */
+int isin(int a);
+
+/**
+ * Integer cosine function (4096 = 360 degrees)
+ * @param a Input
+ * @return Cosine of input
+ */
+int icos(int a);
+
+/**
+ * Higher precision integer sine function (131072 = 360 degrees)
+ * @param a Input
+ * @return Sine of input
+ */
+int hisin(int a);
+/**
+ * Higher precision integer cosine function (131072 = 360 degrees)
+ * @param a Input
+ * @return Cosine of input
+ */
+int hicos(int a);
+
+/**
+ * Save a constant rotation matrix in stack.
+ */
+void PushMatrix();
+
+/**
+ * Reset a constant rotation matrix from stack.
+ */
+void PopMatrix();
+
+/**
+ * Find rotation matrix from a rotation angle. (4096 = 360 degrees)
+ * @param r Rotation angle (input)
+ * @param m Rotation matrix (output)
+ * @return Pointer to m
+ */
+
+MATRIX *RotMatrix(SVECTOR *r, MATRIX *m);
+
+/**
+ * Find rotation matrix from a rotation angle. (high-precision) (131072 = 360 degrees)
+ * @param r Rotation angle (input)
+ * @param m Rotation matrix (output)
+ * @return Pointer to m
+ */
+MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m);
+
+/**
+ * Give an amount of parallel transfer expressed by v to the matrix m.
+ * @param m Pointer to matrix (output)
+ * @param v Pointer to transfer vector (input)
+ * @return Pointer to m
+ */
+MATRIX *TransMatrix(MATRIX *m, VECTOR *r);
+/**
+ * Scale m by v.
+ * @param m Pointer to matrix (output)
+ * @param v Pointer to scale vector (input)
+ * @return Pointer to m
+ */
+MATRIX *ScaleMatrix(MATRIX *m, VECTOR *s);
+
+/**
+ * Multiply two matrices.
+ * @param m0 First matrix (result is saved here)
+ * @param m1 Second matrix
+ * @return Pointer to m0.
+ */
+MATRIX *MulMatrix(MATRIX *m0, MATRIX *m1);
+/**
+ * Multiply two matrices.
+ * @param m0 First matrix
+ * @param m1 Second matrix
+ * @param m2 Output matrix
+ * @return Pointer to m2
+ */
+MATRIX *MulMatrix0(MATRIX *m0, MATRIX *m1, MATRIX *m2);
+/**
+ * Make a composite coordinate transformation matrix.
+ * @param m0 First matrix
+ * @param m1 Second matrix
+ * @param m2 Output matrix
+ * @return Pointer to m2
+ */
+MATRIX *CompMatrixLV(MATRIX *v0, MATRIX *v1, MATRIX *v2);
+/**
+ * Multiply a vector by a matrix.
+ * @param m Pointer to matrix to be multiplied
+ * @param v0 Pointer to vector (input)
+ * @param v1 Pointer to vector (output)
+ * @return Pointer to v1
+ */
+VECTOR *ApplyMatrixLV(MATRIX *m, VECTOR *v0, VECTOR *v1);
+/**
+ * Normalize a vector.
+ * Warning: if ((v0->vx)^2 + (v1->vx)^2 +(v2->vx)^2) > 0x7FFFFFF,
+ * a processor exception will occur.
+ * @param v0 Pointer to vector (input)
+ * @param v1 Pointer to vector (output)
+ */
+void VectorNormalS(VECTOR *v0, SVECTOR *v1);
+/**
+ * Return a vector, obtained by squaring each term of the vector v0, to v1.
+ * @param v0 Pointer to vector (input)
+ * @param v1 Pointer to vector (output)
+ */
+void Square0(VECTOR *v0, VECTOR *v1);
+/**
+ * Square root
+ * @param a Input value
+ * @return Square root of input value
+ */
+int SquareRoot0(int a);
+/**
+ * Square root
+ * @param a Input value in (0, 20, 12) format
+ * @return Square root of input value in (0, 20, 12) format
+ */
+int SquareRoot12(int a);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _MEIDOGTE_H
diff --git a/libmeidogte/meidogte_inline.h b/libmeidogte/meidogte_inline.h
new file mode 100644
index 0000000..ab03702
--- /dev/null
+++ b/libmeidogte/meidogte_inline.h
@@ -0,0 +1,433 @@
+/* Inline GTE macros for the GNU C compiler.
+ *
+ * 2019 Meido-Tek Production
+ *
+ *
+ *
+ * Todo: A couple of GTE operation macros are still missing such as
+ * gte_rtv*() though they appear to be just variants of gte_mvmva more or
+ * less (gte_rtv0() is actually gte_mvmva(1, 0, 0, 3, 0) for example).
+ *
+ */
+
+#ifndef _MEIDOGTE_INLINE_C_H
+#define _MEIDOGTE_INLINE_C_H
+
+/**
+ * GTE load macros
+ */
+
+/**
+ * Load a SVECTOR (passed as a pointer) to GTE V0
+ */
+#define gte_ldv0( r0 ) __asm__ volatile ( \
+ "lwc2 $0 , 0( %0 );" \
+ "lwc2 $1 , 4( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "$t0" )
+
+/**
+ * Load a SVECTOR (passed as a pointer) to GTE V1
+ */
+#define gte_ldv1( r0 ) __asm__ volatile ( \
+ "lwc2 $2 , 0( %0 );" \
+ "lwc2 $3 , 4( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "$t0" )
+
+/**
+ * Load a SVECTOR (passed as a pointer) to GTE V2
+ */
+#define gte_ldv2( r0 ) __asm__ volatile ( \
+ "lwc2 $4 , 0( %0 );" \
+ "lwc2 $5 , 4( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "$t0" )
+
+/**
+ * Load three SVECTORs (passed as a pointer) to the GTE at once
+ */
+#define gte_ldv3( r0, r1, r2 ) __asm__ volatile ( \
+ "lwc2 $0 , 0( %0 );" \
+ "lwc2 $1 , 4( %0 );" \
+ "lwc2 $2 , 0( %1 );" \
+ "lwc2 $3 , 4( %1 );" \
+ "lwc2 $4 , 0( %2 );" \
+ "lwc2 $5 , 4( %2 );" \
+ : \
+ : "r"( r0 ), "r"( r1 ), "r"( r2 ) )
+
+#define gte_ldrgb( r0 ) __asm__ volatile ( \
+ "lwc2 $6 , 0( %0 );" \
+ : \
+ : "r"( r0 ) )
+
+#define gte_ldopv2( r0 ) __asm__ volatile ( \
+ "lwc2 $11, 8( %0 );" \
+ "lwc2 $9 , 0( %0 );" \
+ "lwc2 $10, 4( %0 );" \
+ : \
+ : "r"( r0 ) )
+
+/**
+ * Sets the GTE offset
+ */
+#define gte_SetGeomOffset( r0, r1 ) __asm__ volatile ( \
+ "sll $t0, %0, 16;" \
+ "sll $t1, %1, 16;" \
+ "ctc2 $t0, $24;" \
+ "ctc2 $t1, $25;" \
+ : \
+ : "r"( r0 ), "r"( r1 ) \
+ : "$t0", "$t1" )
+
+#define gte_SetGeomScreen( r0 ) __asm__ volatile ( \
+ "ctc2 %0, $26;" \
+ : \
+ : "r"( r0 ) )
+
+#define gte_SetTransMatrix( r0 ) __asm__ volatile ( \
+ "lw $t0, 20( %0 );" \
+ "lw $t1, 24( %0 );" \
+ "ctc2 $t0, $5;" \
+ "lw $t2, 28( %0 );" \
+ "ctc2 $t1, $6;" \
+ "ctc2 $t2, $7;" \
+ : \
+ : "r"( r0 ) \
+ : "$t2" )
+
+#define gte_SetRotMatrix( r0 ) __asm__ volatile ( \
+ "lw $t0, 0( %0 );" \
+ "lw $t1, 4( %0 );" \
+ "ctc2 $t0, $0;" \
+ "ctc2 $t1, $1;" \
+ "lw $t0, 8( %0 );" \
+ "lw $t1, 12( %0 );" \
+ "lhu $t2, 16( %0 );" \
+ "ctc2 $t0, $2;" \
+ "ctc2 $t1, $3;" \
+ "ctc2 $t2, $4;" \
+ : \
+ : "r"( r0 ) \
+ : "$t2" )
+
+#define gte_SetLightMatrix( r0 ) __asm__ volatile ( \
+ "lw $t0, 0( %0 );" \
+ "lw $t1, 4( %0 );" \
+ "ctc2 $t0, $8;" \
+ "ctc2 $t1, $9;" \
+ "lw $t0, 8( %0 );" \
+ "lw $t1, 12( %0 );" \
+ "lhu $t2, 16( %0 );" \
+ "ctc2 $t0, $10;" \
+ "ctc2 $t1, $11;" \
+ "ctc2 $t2, $12;" \
+ : \
+ : "r"( r0 ) \
+ : "$t2" )
+
+#define gte_SetColorMatrix( r0 ) __asm__ volatile ( \
+ "lw $t0, 0( %0 );" \
+ "lw $t1, 4( %0 );" \
+ "ctc2 $t0, $16;" \
+ "ctc2 $t1, $17;" \
+ "lw $t0, 8( %0 );" \
+ "lw $t1, 12( %0 );" \
+ "lhu $t2, 16( %0 );" \
+ "ctc2 $t0, $18;" \
+ "ctc2 $t1, $19;" \
+ "ctc2 $t2, $20;" \
+ : \
+ : "r"( r0 ) \
+ : "$t2" )
+
+#define gte_SetBackColor( r0, r1, r2 ) __asm__ volatile ( \
+ "sll $t0, %0, 4;" \
+ "sll $t1, %1, 4;" \
+ "sll $t2, %2, 4;" \
+ "ctc2 $t0, $13;" \
+ "ctc2 $t1, $14;" \
+ "ctc2 $t2, $15;" \
+ : \
+ : "r"( r0 ), "r"( r1 ), "r"( r2 ) \
+ : "$t0", "$t1", "$t2" )
+
+/**
+ * GTE store macros
+ */
+
+#define gte_otz( r0 ) __asm__ volatile ( \
+ "swc2 $7, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stflg( r0 ) __asm__ volatile ( \
+ "cfc2 $t0, $31;" \
+ "nop;" \
+ "sw $t0, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stsxy( r0 ) __asm__ volatile ( \
+ "swc2 $14, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stsxy0( r0 ) __asm__ volatile ( \
+ "swc2 $12, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stsxy1( r0 ) __asm__ volatile ( \
+ "swc2 $13, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stsxy2( r0 ) __asm__ volatile ( \
+ "swc2 $14, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stsxy3( r0, r1, r2 ) __asm__ volatile ( \
+ "swc2 $12, 0( %0 );" \
+ "swc2 $13, 0( %1 );" \
+ "swc2 $14, 0( %2 );" \
+ : \
+ : "r"( r0 ), "r"( r1 ), "r"( r2 ) \
+ : "memory" )
+
+#define gte_stotz( r0 ) __asm__ volatile ( \
+ "swc2 $7, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stopz( r0 ) __asm__ volatile ( \
+ "swc2 $24, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_strgb( r0 ) __asm__ volatile ( \
+ "swc2 $22, 0( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_strgb3( r0, r1, r2 ) __asm__ volatile ( \
+ "swc2 $20, 0( %0 );" \
+ "swc2 $21, 0( %1 );" \
+ "swc2 $22, 0( %2 );" \
+ : \
+ : "r"( r0 ), "r"( r1 ), "r" ( r2 ) \
+ : "memory" )
+
+#define gte_stsv( r0 ) __asm__ volatile ( \
+ "mfc2 $t0, $9;" \
+ "mfc2 $t1, $10;" \
+ "mfc2 $t2, $11;" \
+ "sh $t0, 0( %0 );" \
+ "sh $t1, 2( %0 );" \
+ "sh $t2, 4( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+#define gte_stlvnl( r0 ) __asm__ volatile ( \
+ "swc2 $25, 0( %0 );" \
+ "swc2 $26, 4( %0 );" \
+ "swc2 $27, 8( %0 );" \
+ : \
+ : "r"( r0 ) \
+ : "memory" )
+
+
+/**
+ * GTE operation macros
+ */
+
+#define gte_rtps() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0180001;" )
+
+#define gte_rtpt() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0280030;" )
+
+#define gte_nclip() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x1400006;" )
+
+#define gte_avsz3() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x158002D;" )
+
+#define gte_avsz4() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x168002E;" )
+
+#define gte_sqr0() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0A00428;" )
+
+#define gte_sqr12() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0A80428;" )
+
+#define gte_op0() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x170000C;" )
+
+#define gte_op12() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x178000C;" )
+
+#define gte_ncs() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0C8041E;" )
+
+#define gte_nct() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0D80420;" )
+
+#define gte_nccs() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x108041B;" ) \
+
+#define gte_ncct() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x118043F;" )
+
+#define gte_ncds() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0E80413;" )
+
+#define gte_ncdt() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0F80416;" )
+
+#define gte_cc() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x138041C;" )
+
+#define gte_cdp() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x1280414;" )
+
+#define gte_dcpl() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0680029;" )
+
+#define gte_dpcs() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0780010;" )
+
+#define gte_dpct() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0180001;" )
+
+#define gte_intpl() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x0980011;" )
+
+#define gte_gpf0() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x190003D;" )
+
+#define gte_gpf12() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x198003D;" )
+
+#define gte_gpl0() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x1A0003E;" )
+
+#define gte_gpl12() __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 0x1A8003E;" )
+
+#define gte_mvmva_core( r0 ) __asm__ volatile ( \
+ "nop;" \
+ "nop;" \
+ "cop2 %0" \
+ : \
+ : "g"( r0 ) )
+
+#define gte_mvmva(sf, mx, v, cv, lm) gte_mvmva_core( 0x0400012 | \
+ ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) )
+
+
+/**
+ * GTE operation macros without leading nops
+ *
+ * Checking assembler output when using these is advised.
+ */
+
+#define gte_rtps_b() __asm__ volatile ( "cop2 0x0180001;" )
+#define gte_rtpt_b() __asm__ volatile ( "cop2 0x0280030;" )
+#define gte_nclip_b() __asm__ volatile ( "cop2 0x1400006;" )
+#define gte_avsz3_b() __asm__ volatile ( "cop2 0x158002D;" )
+#define gte_avsz4_b() __asm__ volatile ( "cop2 0x168002E;" )
+#define gte_sqr0_b() __asm__ volatile ( "cop2 0x0A00428;" )
+#define gte_sqr12_b() __asm__ volatile ( "cop2 0x0A80428;" )
+#define gte_op0_b() __asm__ volatile ( "cop2 0x170000C;" )
+#define gte_op12_b() __asm__ volatile ( "cop2 0x178000C;" )
+#define gte_ncs_b() __asm__ volatile ( "cop2 0x0C8041E;" )
+#define gte_nct_b() __asm__ volatile ( "cop2 0x0D80420;" )
+#define gte_nccs_b() __asm__ volatile ( "cop2 0x108041B;" )
+#define gte_ncct_b() __asm__ volatile ( "cop2 0x118043F;" )
+#define gte_ncds_b() __asm__ volatile ( "cop2 0x0E80413;" )
+#define gte_ncdt_b() __asm__ volatile ( "cop2 0x0F80416;" )
+#define gte_cc_b() __asm__ volatile ( "cop2 0x138041C;" )
+#define gte_cdp_b() __asm__ volatile ( "cop2 0x1280414;" )
+#define gte_dcpl_b() __asm__ volatile ( "cop2 0x0680029;" )
+#define gte_dpcs_b() __asm__ volatile ( "cop2 0x0780010;" )
+#define gte_dpct_b() __asm__ volatile ( "cop2 0x0180001;" )
+#define gte_intpl_b() __asm__ volatile ( "cop2 0x0980011;" )
+#define gte_gpf0_b() __asm__ volatile ( "cop2 0x190003D;" )
+#define gte_gpf12_b() __asm__ volatile ( "cop2 0x198003D;" )
+#define gte_gpl0_b() __asm__ volatile ( "cop2 0x1A0003E;" )
+#define gte_gpl12_b() __asm__ volatile ( "cop2 0x1A8003E;" )
+#define gte_mvmva_core_b( r0 ) __asm__ volatile ( \
+ "cop2 %0" \
+ : \
+ : "g"( r0 ) )
+#define gte_mvmva_b(sf, mx, v, cv, lm) gte_mvmva_core_b( 0x0400012 | \
+ ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) )
+
+#endif // _MEIDOGTE_INLINE_C_H \ No newline at end of file
diff --git a/libmeidogte/mulmatrix.s b/libmeidogte/mulmatrix.s
new file mode 100644
index 0000000..19dabe8
--- /dev/null
+++ b/libmeidogte/mulmatrix.s
@@ -0,0 +1,74 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global MulMatrix
+.type MulMatrix, @function
+MulMatrix:
+
+ # Load m1 to GTE
+ lw $t0, 0($a1)
+ lw $t1, 4($a1)
+ ctc2 $t0, $0
+ ctc2 $t1, $1
+ lw $t0, 8($a1)
+ lw $t1, 12($a1)
+ lhu $t2, 16($a1)
+ ctc2 $t0, $2
+ ctc2 $t1, $3
+ ctc2 $t2, $4
+
+ lhu $t1, 2*(0+(3*1))($a0) # Load values for first
+ lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(0+(3*2))($a0)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ lhu $t1, 2*(1+(3*1))($a0) # Load values for second
+ lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32
+ MVMVA(1, 0, 0, 3, 0) # First multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(1+(3*2))($a0)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of first
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(0+(3*0))($a0)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(0+(3*1))($a0)
+ sh $t0, 2*(0+(3*2))($a0)
+
+ lhu $t1, 2*(2+(3*1))($a0) # Load values for third
+ lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33
+ MVMVA(1, 0, 0, 3, 0) # Second multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(2+(3*2))($a0)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of second
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(1+(3*0))($a0)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(1+(3*1))($a0)
+ sh $t0, 2*(1+(3*2))($a0)
+ MVMVA(1, 0, 0, 3, 0) # Third multiply
+
+ mfc2 $t0, C2_IR1 # Store results of third
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(2+(3*0))($a0)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(2+(3*1))($a0)
+ sh $t0, 2*(2+(3*2))($a0)
+
+ jr $ra
+ move $v0, $a0
diff --git a/libmeidogte/mulmatrix0.s b/libmeidogte/mulmatrix0.s
new file mode 100644
index 0000000..874226b
--- /dev/null
+++ b/libmeidogte/mulmatrix0.s
@@ -0,0 +1,74 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global MulMatrix0
+.type MulMatrix0, @function
+MulMatrix0:
+
+ # Load m1 to GTE
+ lw $t0, 0($a0)
+ lw $t1, 4($a0)
+ ctc2 $t0, $0
+ ctc2 $t1, $1
+ lw $t0, 8($a0)
+ lw $t1, 12($a0)
+ lhu $t2, 16($a0)
+ ctc2 $t0, $2
+ ctc2 $t1, $3
+ ctc2 $t2, $4
+
+ lhu $t1, 2*(0+(3*1))($a1) # Load values for first
+ lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(0+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ lhu $t1, 2*(1+(3*1))($a1) # Load values for second
+ lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32
+ MVMVA(1, 0, 0, 3, 0) # First multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(1+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of first
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(0+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(0+(3*1))($a2)
+ sh $t0, 2*(0+(3*2))($a2)
+
+ lhu $t1, 2*(2+(3*1))($a1) # Load values for third
+ lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33
+ MVMVA(1, 0, 0, 3, 0) # Second multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(2+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of second
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(1+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(1+(3*1))($a2)
+ sh $t0, 2*(1+(3*2))($a2)
+ MVMVA(1, 0, 0, 3, 0) # Third multiply
+
+ mfc2 $t0, C2_IR1 # Store results of third
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(2+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(2+(3*1))($a2)
+ sh $t0, 2*(2+(3*2))($a2)
+
+ jr $ra
+ move $v0, $a2
diff --git a/libmeidogte/pushpopmatrix.s b/libmeidogte/pushpopmatrix.s
new file mode 100644
index 0000000..d10687a
--- /dev/null
+++ b/libmeidogte/pushpopmatrix.s
@@ -0,0 +1,68 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global PushMatrix
+.type PushMatrix, @function
+PushMatrix:
+ la $a0, _matrix_stack
+ cfc2 $v0, C2_R11R12
+ cfc2 $v1, C2_R13R21
+ sw $v0, 0($a0)
+ cfc2 $v0, C2_R22R23
+ sw $v1, 4($a0)
+ sw $v0, 8($a0)
+ cfc2 $v0, C2_R31R32
+ cfc2 $v1, C2_R33
+ sw $v0, 12($a0)
+ sw $v1, 16($a0)
+ cfc2 $v0, C2_TRX
+ cfc2 $v1, C2_TRY
+ sw $v0, 20($a0)
+ cfc2 $v0, C2_TRZ
+ sw $v1, 24($a0)
+ jr $ra
+ sw $v0, 28($a0)
+
+.global PopMatrix
+.type PopMatrix, @function
+PopMatrix:
+ la $a0, _matrix_stack
+ lw $v0, 0($a0)
+ lw $v1, 4($a0)
+ ctc2 $v0, C2_R11R12
+ ctc2 $v1, C2_R13R21
+ lw $v0, 8($a0)
+ lw $v1, 12($a0)
+ ctc2 $v0, C2_R22R23
+ lw $v0, 16($a0)
+ ctc2 $v1, C2_R31R32
+ ctc2 $v0, C2_R33
+ lw $v0, 20($a0)
+ lw $v1, 24($a0)
+ ctc2 $v0, C2_TRX
+ lw $v0, 28($a0)
+ ctc2 $v1, C2_TRY
+ ctc2 $v0, C2_TRZ
+ jr $ra
+ nop
+
+
+.section .data
+
+
+.type matrix_stack, @object
+_matrix_stack:
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+
diff --git a/libmeidogte/scalematrix.s b/libmeidogte/scalematrix.s
new file mode 100644
index 0000000..3e83800
--- /dev/null
+++ b/libmeidogte/scalematrix.s
@@ -0,0 +1,68 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global ScaleMatrix
+.type ScaleMatrix, @function
+ScaleMatrix:
+
+ lwc2 C2_IR0, 0($a1) # X
+
+ lh $v0, 2*(0+(3*0))($a0)
+ lh $v1, 2*(0+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(0+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(0+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(0+(3*1))($a0)
+ sh $v0, 2*(0+(3*2))($a0)
+
+ lwc2 C2_IR0, 4($a1) # Y
+
+ lh $v0, 2*(1+(3*0))($a0)
+ lh $v1, 2*(1+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(1+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(1+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(1+(3*1))($a0)
+ sh $v0, 2*(1+(3*2))($a0)
+
+ lwc2 C2_IR0, 8($a1) # Z
+
+ lh $v0, 2*(2+(3*0))($a0)
+ lh $v1, 2*(2+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(2+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(2+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(2+(3*1))($a0)
+ sh $v0, 2*(2+(3*2))($a0)
+
+ jr $ra
+ move $v0, $a0
diff --git a/libmeidogte/square0.s b/libmeidogte/square0.s
new file mode 100644
index 0000000..d037b7e
--- /dev/null
+++ b/libmeidogte/square0.s
@@ -0,0 +1,27 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global Square0
+.type Square0, @function
+Square0:
+
+ # a0 - Pointer to input vector (v0)
+ # a1 - Pointer to output vector (v1)
+
+ lwc2 C2_IR1, 0($a0)
+ lwc2 C2_IR2, 4($a0)
+ lwc2 C2_IR3, 8($a0)
+
+ nSQR(0)
+
+ swc2 C2_IR1, 0($a1)
+ swc2 C2_IR2, 4($a1)
+ swc2 C2_IR3, 8($a1)
+
+ jr $ra
+ nop
diff --git a/libmeidogte/squareroot.s b/libmeidogte/squareroot.s
new file mode 100644
index 0000000..af095a2
--- /dev/null
+++ b/libmeidogte/squareroot.s
@@ -0,0 +1,121 @@
+.set noreorder
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+.global SquareRoot12
+.type SquareRoot12, @function
+SquareRoot12:
+ mtc2 $a0, C2_LZCS
+ nop
+ nop
+ mfc2 $v0, C2_LZCR
+ beq $v0, 32, $bad_sqr12
+ nop
+ andi $t0, $v0, 0x1
+ addiu $v1, $0 , -2
+ and $t2, $v0, $v1
+ li $t1, 19
+ sub $t1, $t2
+ sra $t1, 1
+ addi $t3, $t2, -24
+ bltz $t3, $value_less12
+ nop
+ sllv $t4, $a0, $t3
+ b $value_greater12
+$value_less12:
+ addiu $t3, $0 , 24
+ sub $t3, $t2
+ srav $t4, $a0, $t3
+$value_greater12:
+ addi $t4, -64
+ sll $t4, 1
+ la $t5, sqrt_table
+ addu $t5, $t4
+ lh $t5, 0($t5)
+ nop
+
+ bltz $t1, $1594c
+ nop
+ jr $ra
+ sllv $v0, $t5, $t1
+
+$1594c:
+
+ sub $t1, $0 , $t1
+ jr $ra
+ srl $v0, $t5, $t1
+
+$bad_sqr12:
+ jr $ra
+ move $v0, $0
+
+
+.global SquareRoot0
+.type SquareRoot0, @function
+SquareRoot0:
+ mtc2 $a0, C2_LZCS
+ nop
+ nop
+ mfc2 $v0, C2_LZCR
+ beq $v0, 32, $bad_sqr
+ nop
+ andi $t0, $v0, 0x1
+ addiu $v1, $0 , -2
+ and $t2, $v0, $v1
+ li $t1, 31
+ sub $t1, $t2
+ sra $t1, 1
+ addi $t3, $t2, -24
+ bltz $t3, $value_less
+ nop
+ sllv $t4, $a0, $t3
+ b $value_greater
+$value_less:
+ addiu $t3, $0 , 24
+ sub $t3, $t2
+ srav $t4, $a0, $t3
+$value_greater:
+ addi $t4, -64
+ sll $t4, 1
+ la $t5, sqrt_table
+ addu $t5, $t4
+ lh $t5, 0($t5)
+ nop
+ sllv $t5, $t5, $t1
+ jr $ra
+ srl $v0, $t5, 12
+$bad_sqr:
+ jr $ra
+ move $v0, $0
+
+
+.section .data
+
+sqrt_table:
+ .hword 0x1000,0x101f,0x103f,0x105e,0x107e,0x109c,0x10bb,0x10da
+ .hword 0x10f8,0x1116,0x1134,0x1152,0x116f,0x118c,0x11a9,0x11c6
+ .hword 0x11e3,0x1200,0x121c,0x1238,0x1254,0x1270,0x128c,0x12a7
+ .hword 0x12c2,0x12de,0x12f9,0x1314,0x132e,0x1349,0x1364,0x137e
+ .hword 0x1398,0x13b2,0x13cc,0x13e6,0x1400,0x1419,0x1432,0x144c
+ .hword 0x1465,0x147e,0x1497,0x14b0,0x14c8,0x14e1,0x14f9,0x1512
+ .hword 0x152a,0x1542,0x155a,0x1572,0x158a,0x15a2,0x15b9,0x15d1
+ .hword 0x15e8,0x1600,0x1617,0x162e,0x1645,0x165c,0x1673,0x1689
+ .hword 0x16a0,0x16b7,0x16cd,0x16e4,0x16fa,0x1710,0x1726,0x173c
+ .hword 0x1752,0x1768,0x177e,0x1794,0x17aa,0x17bf,0x17d5,0x17ea
+ .hword 0x1800,0x1815,0x182a,0x183f,0x1854,0x1869,0x187e,0x1893
+ .hword 0x18a8,0x18bd,0x18d1,0x18e6,0x18fa,0x190f,0x1923,0x1938
+ .hword 0x194c,0x1960,0x1974,0x1988,0x199c,0x19b0,0x19c4,0x19d8
+ .hword 0x19ec,0x1a00,0x1a13,0x1a27,0x1a3a,0x1a4e,0x1a61,0x1a75
+ .hword 0x1a88,0x1a9b,0x1aae,0x1ac2,0xa1d5,0x1ae8,0x1afb,0x1b0e
+ .hword 0x1b21,0x1b33,0x1b46,0x1b59,0x1b6c,0x1b7e,0x1b91,0x1ba3
+ .hword 0x1bb6,0x1bc8,0x1bdb,0x1bed,0x1c00,0x1c12,0x1c24,0x1c36
+ .hword 0x1c48,0x1c5a,0x1c6c,0x1c7e,0x1c90,0x1ca2,0x1cb4,0x1cc6
+ .hword 0x1cd8,0x1ce9,0x1cfb,0x1d0d,0x1d1e,0x1d30,0x1d41,0x1d53
+ .hword 0x1d64,0x1d76,0x1d87,0x1d98,0x1daa,0x1dbb,0x1dcc,0x1ddd
+ .hword 0x1dee,0x1e00,0x1e11,0x1e22,0x1e33,0x1e43,0x1e54,0x1e65
+ .hword 0x1e76,0x1e87,0x1e98,0x1ea8,0x1eb9,0x1eca,0x1eda,0x1eeb
+ .hword 0x1efb,0x1f0c,0x1f1c,0x1f2d,0x1f3d,0x1f4e,0x1f5e,0x1f6e
+ .hword 0x1f7e,0x1f8f,0x1f9f,0x1faf,0x1fbf,0x1fcf,0x1fdf,0x1fef
diff --git a/libmeidogte/vectornormals.s b/libmeidogte/vectornormals.s
new file mode 100644
index 0000000..0dbe1e8
--- /dev/null
+++ b/libmeidogte/vectornormals.s
@@ -0,0 +1,107 @@
+.set noreorder
+.set noat
+
+.include "gtereg.h"
+.include "inline_s.h"
+
+.section .text
+
+
+.global VectorNormalS
+.type VectorNormalS, @function
+VectorNormalS:
+ lw $t0, 0($a0)
+ lw $t1, 4($a0)
+ lw $t2, 8($a0)
+
+ mtc2 $t0, C2_IR1
+ mtc2 $t1, C2_IR2
+ mtc2 $t2, C2_IR3
+
+ nSQR(0)
+
+ mfc2 $t3, C2_MAC1
+ mfc2 $t4, C2_MAC2
+ mfc2 $t5, C2_MAC3
+
+ add $t3, $t4
+ add $v0, $t3, $t5
+ mtc2 $v0, C2_LZCS
+ nop
+ nop
+ mfc2 $v1, C2_LZCR
+
+ addiu $at, $0 , -2
+ and $v1, $at
+
+ addiu $t6, $0 , 0x1f
+ sub $t6, $v1
+ sra $t6, 1
+ addiu $t3, $v1, -24
+
+ bltz $t3, $value_neg
+ nop
+ b $value_pos
+ sllv $t4, $v0, $t3
+$value_neg:
+ addiu $t3, $0 , 24
+ sub $t3, $v1
+ srav $t4, $v0, $t3
+$value_pos:
+ addi $t4, -64
+ sll $t4, 1
+
+ la $t5, _norm_table
+ addu $t5, $t4
+ lh $t5, 0($t5)
+ nop
+
+ mtc2 $t5, C2_IR0
+ mtc2 $t0, C2_IR1
+ mtc2 $t1, C2_IR2
+ mtc2 $t2, C2_IR3
+
+ nGPF(0)
+
+ mfc2 $t0, C2_MAC1
+ mfc2 $t1, C2_MAC2
+ mfc2 $t2, C2_MAC3
+
+ sra $t0, $t6
+ sra $t1, $t6
+ sra $t2, $t6
+
+ sh $t0, 0($a1)
+ sh $t1, 2($a1)
+ jr $ra
+ sh $t2, 4($a1)
+
+
+.section .data
+
+_norm_table:
+ .hword 0x1000, 0x0FE0, 0x0FC1, 0x0FA3, 0x0F85, 0x0F68, 0x0F4C, 0x0F30
+ .hword 0x0F15, 0x0EFB, 0x0EE1, 0x0EC7, 0x0EAE, 0x0E96, 0x0E7E, 0x0E66
+ .hword 0x0E4F, 0x0E38, 0x0E22, 0x0E0C, 0x0DF7, 0x0DE2, 0x0DCD, 0x0DB9
+ .hword 0x0DA5, 0x0D91, 0x0D7E, 0x0D6B, 0x0D58, 0x0D45, 0x0D33, 0x0D21
+ .hword 0x0D10, 0x0CFF, 0x0CEE, 0x0CDD, 0x0CCC, 0x0CBC, 0x0CAC, 0x0C9C
+ .hword 0x0C8D, 0x0C7D, 0x0C6E, 0x0C5F, 0x0C51, 0x0C42, 0x0C34, 0x0C26
+ .hword 0x0C18, 0x0C0A, 0x0BFD, 0x0BEF, 0x0BE2, 0x0BD5, 0x0BC8, 0x0BBB
+ .hword 0x0BAF, 0x0BA2, 0x0B96, 0x0B8A, 0x0B7E, 0x0B72, 0x0B67, 0x0B5B
+ .hword 0x0B50, 0x0B45, 0x0B39, 0x0B2E, 0x0B24, 0x0B19, 0x0B0E, 0x0B04
+ .hword 0x0AF9, 0x0AEF, 0x0AE5, 0x0ADB, 0x0AD1, 0x0AC7, 0x0ABD, 0x0AB4
+ .hword 0x0AAA, 0x0AA1, 0x0A97, 0x0A8E, 0x0A85, 0x0A7C, 0x0A73, 0x0A6A
+ .hword 0x0A61, 0x0A59, 0x0A50, 0x0A47, 0x0A3F, 0x0A37, 0x0A2E, 0x0A26
+ .hword 0x0A1E, 0x0A16, 0x0A0E, 0x0A06, 0x09FE, 0x09F6, 0x09EF, 0x09E7
+ .hword 0x09E0, 0x09D8, 0x09D1, 0x09C9, 0x09C2, 0x09BB, 0x09B4, 0x09AD
+ .hword 0x09A5, 0x099E, 0x0998, 0x0991, 0x098A, 0x0983, 0x097C, 0x0976
+ .hword 0x096F, 0x0969, 0x0962, 0x095C, 0x0955, 0x094F, 0x0949, 0x0943
+ .hword 0x093C, 0x0936, 0x0930, 0x092A, 0x0924, 0x091E, 0x0918, 0x0912
+ .hword 0x090D, 0x0907, 0x0901, 0x08FB, 0x08F6, 0x08F0, 0x08EB, 0x08E5
+ .hword 0x08E0, 0x08DA, 0x08D5, 0x08CF, 0x08CA, 0x08C5, 0x08BF, 0x08BA
+ .hword 0x08B5, 0x08B0, 0x08AB, 0x08A6, 0x08A1, 0x089C, 0x0897, 0x0892
+ .hword 0x088D, 0x0888, 0x0883, 0x087E, 0x087A, 0x0875, 0x0870, 0x086B
+ .hword 0x0867, 0x0862, 0x085E, 0x0859, 0x0855, 0x0850, 0x084C, 0x0847
+ .hword 0x0843, 0x083E, 0x083A, 0x0836, 0x0831, 0x082D, 0x0829, 0x0824
+ .hword 0x0820, 0x081C, 0x0818, 0x0814, 0x0810, 0x080C, 0x0808, 0x0804
+