diff options
Diffstat (limited to 'libmeidogte')
| -rw-r--r-- | libmeidogte/Makefile | 26 | ||||
| -rw-r--r-- | libmeidogte/applymatrixlv.s | 40 | ||||
| -rw-r--r-- | libmeidogte/compmatrixlv.s | 100 | ||||
| -rw-r--r-- | libmeidogte/gtereg.h | 80 | ||||
| -rw-r--r-- | libmeidogte/hirotmatrix.c | 35 | ||||
| -rw-r--r-- | libmeidogte/hisin.c | 33 | ||||
| -rw-r--r-- | libmeidogte/initgeom.s | 45 | ||||
| -rw-r--r-- | libmeidogte/inline_s.h | 227 | ||||
| -rw-r--r-- | libmeidogte/isin.c | 34 | ||||
| -rw-r--r-- | libmeidogte/matrix.c | 45 | ||||
| -rw-r--r-- | libmeidogte/meidogte.h | 170 | ||||
| -rw-r--r-- | libmeidogte/meidogte_inline.h | 433 | ||||
| -rw-r--r-- | libmeidogte/mulmatrix.s | 74 | ||||
| -rw-r--r-- | libmeidogte/mulmatrix0.s | 74 | ||||
| -rw-r--r-- | libmeidogte/pushpopmatrix.s | 68 | ||||
| -rw-r--r-- | libmeidogte/scalematrix.s | 68 | ||||
| -rw-r--r-- | libmeidogte/square0.s | 27 | ||||
| -rw-r--r-- | libmeidogte/squareroot.s | 121 | ||||
| -rw-r--r-- | libmeidogte/vectornormals.s | 107 |
19 files changed, 1807 insertions, 0 deletions
diff --git a/libmeidogte/Makefile b/libmeidogte/Makefile new file mode 100644 index 0000000..938f3c2 --- /dev/null +++ b/libmeidogte/Makefile @@ -0,0 +1,26 @@ +include ../Makefile.cfg + +CFILES = $(notdir $(wildcard ./*.c)) +AFILES = $(notdir $(wildcard ./*.s)) +OFILES = $(CFILES:.c=.o) $(AFILES:.s=.o) + +TARGET = libmeidogte.a + +all: $(TARGET) + +$(TARGET): $(OFILES) + $(AR) cr $(TARGET) $(OFILES) + $(RANLIB) $(TARGET) + +%.o: %.c + $(CC) $(CFLAGS) -I./ -c $< -o $@ + +%.o: %.s + $(AS) $(AFLAGS) -I ./ $< -o $@ + +clean: + rm -Rf $(TARGET) $(OFILES) + +install: all + cp $(TARGET) $(TOOLCHAIN_PREFIX)/lib + cp meidogte.h meidogte_inline.h $(TOOLCHAIN_PREFIX)/include diff --git a/libmeidogte/applymatrixlv.s b/libmeidogte/applymatrixlv.s new file mode 100644 index 0000000..332a2f8 --- /dev/null +++ b/libmeidogte/applymatrixlv.s @@ -0,0 +1,40 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global ApplyMatrixLV +.type ApplyMatrixLV, @function +ApplyMatrixLV: + + # Load matrix to GTE + lw $t0, 0($a0) + lw $t1, 4($a0) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a0) + lw $t1, 12($a0) + lhu $t2, 16($a0) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lw $t0, 0($a1) + lw $t1, 4($a1) + mtc2 $t0, C2_IR1 + lw $t0, 8($a1) + mtc2 $t1, C2_IR2 + mtc2 $t0, C2_IR3 + + nMVMVA(1, 0, 3, 3, 0) + + swc2 C2_IR1, 0($a2) + swc2 C2_IR2, 4($a2) + swc2 C2_IR3, 8($a2) + + jr $ra + move $v0, $a2 +
\ No newline at end of file diff --git a/libmeidogte/compmatrixlv.s b/libmeidogte/compmatrixlv.s new file mode 100644 index 0000000..f613385 --- /dev/null +++ b/libmeidogte/compmatrixlv.s @@ -0,0 +1,100 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.set MATRIX_r11r12, 0 +.set MATRIX_r13r21, 4 +.set MATRIX_r22r23, 8 +.set MATRIX_r31r32, 12 +.set MATRIX_r33, 16 +.set MATRIX_trx, 20 +.set MATRIX_try, 24 +.set MATRIX_trz, 28 + + +.global CompMatrixLV +.type CompMatrixLV, @function +CompMatrixLV: + + # Load matrix v0 to GTE + lw $t0, MATRIX_r11r12($a0) + lw $t1, MATRIX_r13r21($a0) + ctc2 $t0, C2_R11R12 + ctc2 $t1, C2_R13R21 + lw $t0, MATRIX_r22r23($a0) + lw $t1, MATRIX_r31r32($a0) + lhu $t2, MATRIX_r33($a0) + ctc2 $t0, C2_R22R23 + lw $t0, MATRIX_trx($a0) + ctc2 $t1, C2_R31R32 + lw $t1, MATRIX_try($a0) + ctc2 $t2, C2_R33 + lw $t2, MATRIX_trz($a0) + ctc2 $t0, C2_TRX + ctc2 $t1, C2_TRY + ctc2 $t2, C2_TRZ + + lw $t0, MATRIX_trx($a1) + lw $t1, MATRIX_try($a1) + mtc2 $t0, C2_IR1 + lw $t0, MATRIX_trz($a1) + mtc2 $t1, C2_IR2 + mtc2 $t0, C2_IR3 + + nMVMVA(1, 0, 3, 0, 0) + + swc2 C2_IR1, MATRIX_trx($a2) + swc2 C2_IR2, MATRIX_try($a2) + swc2 C2_IR3, MATRIX_trz($a2) + + lhu $t1, 2*(0+(3*1))($a1) # Load values for first + lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a1) # Load values for second + lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a2) + sh $t0, 2*(0+(3*2))($a2) + + lhu $t1, 2*(2+(3*1))($a1) # Load values for third + lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a2) + sh $t0, 2*(1+(3*2))($a2) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a2) + sh $t0, 2*(2+(3*2))($a2) + + jr $ra + move $v0, $a2 diff --git a/libmeidogte/gtereg.h b/libmeidogte/gtereg.h new file mode 100644 index 0000000..5d3391b --- /dev/null +++ b/libmeidogte/gtereg.h @@ -0,0 +1,80 @@ +# GTE register definitions for GNU assembler (as). +# +# 2019 Meido-Tek Productions + +# +# GTE data registers (use mfc2, mtc2, lwc2, swc2) +# +.set C2_VXY0, $0 +.set C2_VZ0, $1 +.set C2_VXY1, $2 +.set C2_VZ1, $3 +.set C2_VXY2, $4 +.set C2_VZ2, $5 +.set C2_RGB, $6 +.set C2_OTZ, $7 + +.set C2_IR0, $8 +.set C2_IR1, $9 +.set C2_IR2, $10 +.set C2_IR3, $11 +.set C2_SXY0, $12 +.set C2_SXY1, $13 +.set C2_SXY2, $14 +.set C2_SXYP, $15 + +.set C2_SZ0, $16 +.set C2_SZ1, $17 +.set C2_SZ2, $18 +.set C2_SZ3, $19 +.set C2_RGB0, $20 +.set C2_RGB1, $21 +.set C2_RGB2, $22 + +.set C2_MAC0, $24 +.set C2_MAC1, $25 +.set C2_MAC2, $26 +.set C2_MAC3, $27 +.set C2_IRGB, $28 +.set C2_ORGB, $29 +.set C2_LZCS, $30 +.set C2_LZCR, $31 + +# +# GTE control registers (use cfc2/ctc2) +# +.set C2_R11R12, $0 +.set C2_R13R21, $1 +.set C2_R22R23, $2 +.set C2_R31R32, $3 +.set C2_R33, $4 +.set C2_TRX, $5 +.set C2_TRY, $6 +.set C2_TRZ, $7 + +.set C2_L11L12, $8 +.set C2_L13L21, $9 +.set C2_L22L23, $10 +.set C2_L31L32, $11 +.set C2_L33, $12 +.set C2_RBK, $13 +.set C2_GBK, $14 +.set C2_BBK, $15 + +.set C2_LR1LR2, $16 +.set C2_LR3LG1, $17 +.set C2_LG2LG3, $18 +.set C2_LB1LB2, $19 +.set C2_LB3, $20 +.set C2_RFC, $21 +.set C2_GFC, $22 +.set C2_BFC, $23 + +.set C2_OFX, $24 +.set C2_OFY, $25 +.set C2_H, $26 +.set C2_DQA, $27 +.set C2_DQB, $28 +.set C2_ZSF3, $29 +.set C2_ZSF4, $30 +.set C2_FLAG, $31 diff --git a/libmeidogte/hirotmatrix.c b/libmeidogte/hirotmatrix.c new file mode 100644 index 0000000..5a252ff --- /dev/null +++ b/libmeidogte/hirotmatrix.c @@ -0,0 +1,35 @@ +#include <meidogte.h> + +MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m) { + + short s[3],c[3]; + MATRIX tm[3]; + + s[0] = hisin(r->vx); s[1] = hisin(r->vy); s[2] = hisin(r->vz); + c[0] = hicos(r->vx); c[1] = hicos(r->vy); c[2] = hicos(r->vz); + + // mX + m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0; + m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0]; + m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0]; + + // mY + tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1]; + tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0; + tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1]; + + // mZ + tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0; + tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0; + tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE; + + PushMatrix(); + + MulMatrix0( m, &tm[0], &tm[2] ); + MulMatrix0( &tm[2], &tm[1], m ); + + PopMatrix(); + + return m; + +} diff --git a/libmeidogte/hisin.c b/libmeidogte/hisin.c new file mode 100644 index 0000000..df03194 --- /dev/null +++ b/libmeidogte/hisin.c @@ -0,0 +1,33 @@ +/* Based on isin_S4 implementation from coranac: + * http://www.coranac.com/2009/07/sines/ + * + */ + +#define qN 15 +#define qA 12 +#define B 19900 +#define C 3516 + +int hisin(int x) { + + int c, y; + + c= x<<(30-qN); // Semi-circle info into carry. + x -= 1<<qN; // sine -> cosine calc + + x= x<<(31-qN); // Mask with PI + x= x>>(31-qN); // Note: SIGNED shift! (to qN) + x= x*x>>(2*qN-14); // x=x^2 To Q14 + + y= B - (x*C>>14); // B - x^2*C + y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C) + + return c>=0 ? y : -y; + +} + +int hicos(int x) { + + return hisin( x+32768 ); + +} diff --git a/libmeidogte/initgeom.s b/libmeidogte/initgeom.s new file mode 100644 index 0000000..14ca293 --- /dev/null +++ b/libmeidogte/initgeom.s @@ -0,0 +1,45 @@ +.set noreorder + +.include "gtereg.h" + +.section .text + + +.global InitGeom +.type InitGeom, @function +InitGeom: + addiu $sp, -4 + sw $ra, 0($sp) + + jal EnterCriticalSection + nop + + mfc0 $v0, $12 # Get SR + lui $v1, 0x4000 # Set bit to enable cop2 + or $v0, $v1 + mtc0 $v0, $12 # Set new SR + + jal ExitCriticalSection + nop + + ctc2 $0 , $24 # Reset GTE offset + ctc2 $0 , $25 + + li $v0, 320 # Set default projection plane + ctc2 $v0, $26 + + li $v0, 0x155 # Set ZSF3 and ZSF4 defaults + ctc2 $v0, $29 + li $v0, 0x100 + ctc2 $v0, $30 + + li $v0, 0xef9e # DQA and DQB defaults + lui $v1, 0x0140 + ctc2 $v0, C2_DQA + ctc2 $v1, C2_DQB + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + diff --git a/libmeidogte/inline_s.h b/libmeidogte/inline_s.h new file mode 100644 index 0000000..08e5c38 --- /dev/null +++ b/libmeidogte/inline_s.h @@ -0,0 +1,227 @@ +# Inline GTE macros for GNU assembler (as). +# +# 2019 Meido-Tek Productions +# + +.macro nRTPS + nop + nop + cop2 0x0180001 +.endm + +.macro nRTPT + nop + nop + cop2 0x0280030 +.endm + +.macro nNCLIP + nop + nop + cop2 0x1400006 +.endm + +.macro nAVSZ3 + nop + nop + cop2 0x158002D +.endm + +.macro nAVSZ4 + nop + nop + cop2 0x168002E +.endm + +.macro nMVMVA sf mx v cv lm + nop + nop + cop2 0x0400012|(\sf<<19)|(\mx<<17)|(\v<<15)|(\cv<<13)|(\lm<<10) +.endm + +.macro nSQR sf + nop + nop + cop2 0x0A00428|(\sf<<19) +.endm + +.macro nnOP sf lm # extra n to prevent conflict with the nop opcode + nop + nop + cop2 0x170000C|(\sf<<19)|(\lm<<10) +.endm + +.macro nNCS + nop + nop + cop2 0x0C8041E +.endm + +.macro nNCT + nop + nop + cop2 0x0D80420 +.endm + +.macro nNCCS + nop + nop + cop2 0x108041B +.endm + +.macro nNCCT + nop + nop + cop2 0x118043F +.endm + +.macro nNCDS + nop + nop + cop2 0x0E80413 +.endm + +.macro nNCDT + nop + nop + cop2 0x0F80416 +.endm + +.macro nCC + nop + nop + cop2 0x138041C +.endm + +.macro nCDP + nop + nop + cop2 0x1280414 +.endm + +.macro nDCPL + nop + nop + cop2 0x0680029 +.endm + +.macro nDPCS + nop + nop + cop2 0x0780010 +.endm + +.macro nDPCT + nop + nop + cop2 0x0180001 +.endm + +.macro nINTPL + nop + nop + cop2 0x0980011 +.endm + +.macro nGPF sf + nop + nop + cop2 0x190003D|(\sf<<19) +.endm + +.macro nGPL sf + nop + nop + cop2 0x1A0003E|(\sf<<19) +.endm + +# +# Macros without leading nops (for optimized usage) +# +.macro RTPS + cop2 0x0180001 +.endm + +.macro RTPT + cop2 0x0280030 +.endm + +.macro NCLIP + cop2 0x1400006 +.endm + +.macro AVSZ3 + cop2 0x158002D +.endm + +.macro AVSZ4 + cop2 0x168002E +.endm + +.macro MVMVA sf mx v cv lm + cop2 0x0400012|(\sf<<19)|(\mx<<17)|(\v<<15)|(\cv<<13)|(\lm<<10) +.endm + +.macro SQR sf + cop2 0x0A00428|(\sf<<19) +.endm + +.macro OP sf lm + cop2 0x170000C|(\sf<<19)|(\lm<<10) +.endm + +.macro NCS + cop2 0x0C8041E +.endm + +.macro NCT + cop2 0x0D80420 +.endm + +.macro NCCS + cop2 0x108041B +.endm + +.macro NCCT + cop2 0x118043F +.endm + +.macro NCDS + cop2 0x0E80413 +.endm + +.macro NCDT + cop2 0x0F80416 +.endm + +.macro CC + cop2 0x138041C +.endm + +.macro CDP + cop2 0x1280414 +.endm + +.macro DCPL + cop2 0x0680029 +.endm + +.macro DPCS + cop2 0x0780010 +.endm + +.macro DPCT + cop2 0x0180001 +.endm + +.macro INTPL + cop2 0x0980011 +.endm + +.macro GPF sf + cop2 0x190003D|(\sf<<19) +.endm + +.macro GPL sf + cop2 0x1A0003E|(\sf<<19) +.endm diff --git a/libmeidogte/isin.c b/libmeidogte/isin.c new file mode 100644 index 0000000..3641efd --- /dev/null +++ b/libmeidogte/isin.c @@ -0,0 +1,34 @@ +/* Based on isin_S4 implementation from coranac: + * http://www.coranac.com/2009/07/sines/ + * + */ + +#define qN 10 +#define qA 12 +#define B 19900 +#define C 3516 + +int isin(int x) { + + int c, y; + + c= x<<(30-qN); // Semi-circle info into carry. + x -= 1<<qN; // sine -> cosine calc + + x= x<<(31-qN); // Mask with PI + x= x>>(31-qN); // Note: SIGNED shift! (to qN) + + x= x*x>>(2*qN-14); // x=x^2 To Q14 + + y= B - (x*C>>14); // B - x^2*C + y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C) + + return c>=0 ? y : -y; + +} + +int icos(int x) { + + return isin( x+1024 ); + +} diff --git a/libmeidogte/matrix.c b/libmeidogte/matrix.c new file mode 100644 index 0000000..1c226e1 --- /dev/null +++ b/libmeidogte/matrix.c @@ -0,0 +1,45 @@ +#include <meidogte.h> + +MATRIX *RotMatrix(SVECTOR *r, MATRIX *m) { + + short s[3],c[3]; + MATRIX tm[3]; + + s[0] = isin(r->vx); s[1] = isin(r->vy); s[2] = isin(r->vz); + c[0] = icos(r->vx); c[1] = icos(r->vy); c[2] = icos(r->vz); + + // mX + m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0; + m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0]; + m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0]; + + // mY + tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1]; + tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0; + tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1]; + + // mZ + tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0; + tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0; + tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE; + + PushMatrix(); + + MulMatrix0( m, &tm[0], &tm[2] ); + MulMatrix0( &tm[2], &tm[1], m ); + + PopMatrix(); + + return m; + +} + +MATRIX *TransMatrix(MATRIX *m, VECTOR *r) { + + m->t[0] = r->vx; + m->t[1] = r->vy; + m->t[2] = r->vz; + + return m; + +} diff --git a/libmeidogte/meidogte.h b/libmeidogte/meidogte.h new file mode 100644 index 0000000..3953701 --- /dev/null +++ b/libmeidogte/meidogte.h @@ -0,0 +1,170 @@ +#ifndef _MEIDOGTE_H +#define _MEIDOGTE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <meidogte_inline.h> + +/** + * One degree = 4096 + */ +#define ONE 4096 + + +typedef struct { + short m[3][3]; + int t[3]; +} MATRIX; + +typedef struct { + int vx, vy, vz; +} VECTOR; + +typedef struct { + short vx, vy, vz, pad; +} SVECTOR; + +typedef struct { + unsigned char r, g, b, cd; +} CVECTOR; + +/** + * Initialize MeidoGTE library + */ + +void InitGeom(); + +/** + * Integer sine function (4096 = 360 degrees) + * @param a Input + * @return Sine of input + */ +int isin(int a); + +/** + * Integer cosine function (4096 = 360 degrees) + * @param a Input + * @return Cosine of input + */ +int icos(int a); + +/** + * Higher precision integer sine function (131072 = 360 degrees) + * @param a Input + * @return Sine of input + */ +int hisin(int a); +/** + * Higher precision integer cosine function (131072 = 360 degrees) + * @param a Input + * @return Cosine of input + */ +int hicos(int a); + +/** + * Save a constant rotation matrix in stack. + */ +void PushMatrix(); + +/** + * Reset a constant rotation matrix from stack. + */ +void PopMatrix(); + +/** + * Find rotation matrix from a rotation angle. (4096 = 360 degrees) + * @param r Rotation angle (input) + * @param m Rotation matrix (output) + * @return Pointer to m + */ + +MATRIX *RotMatrix(SVECTOR *r, MATRIX *m); + +/** + * Find rotation matrix from a rotation angle. (high-precision) (131072 = 360 degrees) + * @param r Rotation angle (input) + * @param m Rotation matrix (output) + * @return Pointer to m + */ +MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m); + +/** + * Give an amount of parallel transfer expressed by v to the matrix m. + * @param m Pointer to matrix (output) + * @param v Pointer to transfer vector (input) + * @return Pointer to m + */ +MATRIX *TransMatrix(MATRIX *m, VECTOR *r); +/** + * Scale m by v. + * @param m Pointer to matrix (output) + * @param v Pointer to scale vector (input) + * @return Pointer to m + */ +MATRIX *ScaleMatrix(MATRIX *m, VECTOR *s); + +/** + * Multiply two matrices. + * @param m0 First matrix (result is saved here) + * @param m1 Second matrix + * @return Pointer to m0. + */ +MATRIX *MulMatrix(MATRIX *m0, MATRIX *m1); +/** + * Multiply two matrices. + * @param m0 First matrix + * @param m1 Second matrix + * @param m2 Output matrix + * @return Pointer to m2 + */ +MATRIX *MulMatrix0(MATRIX *m0, MATRIX *m1, MATRIX *m2); +/** + * Make a composite coordinate transformation matrix. + * @param m0 First matrix + * @param m1 Second matrix + * @param m2 Output matrix + * @return Pointer to m2 + */ +MATRIX *CompMatrixLV(MATRIX *v0, MATRIX *v1, MATRIX *v2); +/** + * Multiply a vector by a matrix. + * @param m Pointer to matrix to be multiplied + * @param v0 Pointer to vector (input) + * @param v1 Pointer to vector (output) + * @return Pointer to v1 + */ +VECTOR *ApplyMatrixLV(MATRIX *m, VECTOR *v0, VECTOR *v1); +/** + * Normalize a vector. + * Warning: if ((v0->vx)^2 + (v1->vx)^2 +(v2->vx)^2) > 0x7FFFFFF, + * a processor exception will occur. + * @param v0 Pointer to vector (input) + * @param v1 Pointer to vector (output) + */ +void VectorNormalS(VECTOR *v0, SVECTOR *v1); +/** + * Return a vector, obtained by squaring each term of the vector v0, to v1. + * @param v0 Pointer to vector (input) + * @param v1 Pointer to vector (output) + */ +void Square0(VECTOR *v0, VECTOR *v1); +/** + * Square root + * @param a Input value + * @return Square root of input value + */ +int SquareRoot0(int a); +/** + * Square root + * @param a Input value in (0, 20, 12) format + * @return Square root of input value in (0, 20, 12) format + */ +int SquareRoot12(int a); + +#ifdef __cplusplus +} +#endif + +#endif // _MEIDOGTE_H diff --git a/libmeidogte/meidogte_inline.h b/libmeidogte/meidogte_inline.h new file mode 100644 index 0000000..ab03702 --- /dev/null +++ b/libmeidogte/meidogte_inline.h @@ -0,0 +1,433 @@ +/* Inline GTE macros for the GNU C compiler. + * + * 2019 Meido-Tek Production + * + * + * + * Todo: A couple of GTE operation macros are still missing such as + * gte_rtv*() though they appear to be just variants of gte_mvmva more or + * less (gte_rtv0() is actually gte_mvmva(1, 0, 0, 3, 0) for example). + * + */ + +#ifndef _MEIDOGTE_INLINE_C_H +#define _MEIDOGTE_INLINE_C_H + +/** + * GTE load macros + */ + +/** + * Load a SVECTOR (passed as a pointer) to GTE V0 + */ +#define gte_ldv0( r0 ) __asm__ volatile ( \ + "lwc2 $0 , 0( %0 );" \ + "lwc2 $1 , 4( %0 );" \ + : \ + : "r"( r0 ) \ + : "$t0" ) + +/** + * Load a SVECTOR (passed as a pointer) to GTE V1 + */ +#define gte_ldv1( r0 ) __asm__ volatile ( \ + "lwc2 $2 , 0( %0 );" \ + "lwc2 $3 , 4( %0 );" \ + : \ + : "r"( r0 ) \ + : "$t0" ) + +/** + * Load a SVECTOR (passed as a pointer) to GTE V2 + */ +#define gte_ldv2( r0 ) __asm__ volatile ( \ + "lwc2 $4 , 0( %0 );" \ + "lwc2 $5 , 4( %0 );" \ + : \ + : "r"( r0 ) \ + : "$t0" ) + +/** + * Load three SVECTORs (passed as a pointer) to the GTE at once + */ +#define gte_ldv3( r0, r1, r2 ) __asm__ volatile ( \ + "lwc2 $0 , 0( %0 );" \ + "lwc2 $1 , 4( %0 );" \ + "lwc2 $2 , 0( %1 );" \ + "lwc2 $3 , 4( %1 );" \ + "lwc2 $4 , 0( %2 );" \ + "lwc2 $5 , 4( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_ldrgb( r0 ) __asm__ volatile ( \ + "lwc2 $6 , 0( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldopv2( r0 ) __asm__ volatile ( \ + "lwc2 $11, 8( %0 );" \ + "lwc2 $9 , 0( %0 );" \ + "lwc2 $10, 4( %0 );" \ + : \ + : "r"( r0 ) ) + +/** + * Sets the GTE offset + */ +#define gte_SetGeomOffset( r0, r1 ) __asm__ volatile ( \ + "sll $t0, %0, 16;" \ + "sll $t1, %1, 16;" \ + "ctc2 $t0, $24;" \ + "ctc2 $t1, $25;" \ + : \ + : "r"( r0 ), "r"( r1 ) \ + : "$t0", "$t1" ) + +#define gte_SetGeomScreen( r0 ) __asm__ volatile ( \ + "ctc2 %0, $26;" \ + : \ + : "r"( r0 ) ) + +#define gte_SetTransMatrix( r0 ) __asm__ volatile ( \ + "lw $t0, 20( %0 );" \ + "lw $t1, 24( %0 );" \ + "ctc2 $t0, $5;" \ + "lw $t2, 28( %0 );" \ + "ctc2 $t1, $6;" \ + "ctc2 $t2, $7;" \ + : \ + : "r"( r0 ) \ + : "$t2" ) + +#define gte_SetRotMatrix( r0 ) __asm__ volatile ( \ + "lw $t0, 0( %0 );" \ + "lw $t1, 4( %0 );" \ + "ctc2 $t0, $0;" \ + "ctc2 $t1, $1;" \ + "lw $t0, 8( %0 );" \ + "lw $t1, 12( %0 );" \ + "lhu $t2, 16( %0 );" \ + "ctc2 $t0, $2;" \ + "ctc2 $t1, $3;" \ + "ctc2 $t2, $4;" \ + : \ + : "r"( r0 ) \ + : "$t2" ) + +#define gte_SetLightMatrix( r0 ) __asm__ volatile ( \ + "lw $t0, 0( %0 );" \ + "lw $t1, 4( %0 );" \ + "ctc2 $t0, $8;" \ + "ctc2 $t1, $9;" \ + "lw $t0, 8( %0 );" \ + "lw $t1, 12( %0 );" \ + "lhu $t2, 16( %0 );" \ + "ctc2 $t0, $10;" \ + "ctc2 $t1, $11;" \ + "ctc2 $t2, $12;" \ + : \ + : "r"( r0 ) \ + : "$t2" ) + +#define gte_SetColorMatrix( r0 ) __asm__ volatile ( \ + "lw $t0, 0( %0 );" \ + "lw $t1, 4( %0 );" \ + "ctc2 $t0, $16;" \ + "ctc2 $t1, $17;" \ + "lw $t0, 8( %0 );" \ + "lw $t1, 12( %0 );" \ + "lhu $t2, 16( %0 );" \ + "ctc2 $t0, $18;" \ + "ctc2 $t1, $19;" \ + "ctc2 $t2, $20;" \ + : \ + : "r"( r0 ) \ + : "$t2" ) + +#define gte_SetBackColor( r0, r1, r2 ) __asm__ volatile ( \ + "sll $t0, %0, 4;" \ + "sll $t1, %1, 4;" \ + "sll $t2, %2, 4;" \ + "ctc2 $t0, $13;" \ + "ctc2 $t1, $14;" \ + "ctc2 $t2, $15;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$t0", "$t1", "$t2" ) + +/** + * GTE store macros + */ + +#define gte_otz( r0 ) __asm__ volatile ( \ + "swc2 $7, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stflg( r0 ) __asm__ volatile ( \ + "cfc2 $t0, $31;" \ + "nop;" \ + "sw $t0, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsxy( r0 ) __asm__ volatile ( \ + "swc2 $14, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsxy0( r0 ) __asm__ volatile ( \ + "swc2 $12, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsxy1( r0 ) __asm__ volatile ( \ + "swc2 $13, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsxy2( r0 ) __asm__ volatile ( \ + "swc2 $14, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsxy3( r0, r1, r2 ) __asm__ volatile ( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 0( %1 );" \ + "swc2 $14, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "memory" ) + +#define gte_stotz( r0 ) __asm__ volatile ( \ + "swc2 $7, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stopz( r0 ) __asm__ volatile ( \ + "swc2 $24, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_strgb( r0 ) __asm__ volatile ( \ + "swc2 $22, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_strgb3( r0, r1, r2 ) __asm__ volatile ( \ + "swc2 $20, 0( %0 );" \ + "swc2 $21, 0( %1 );" \ + "swc2 $22, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r" ( r2 ) \ + : "memory" ) + +#define gte_stsv( r0 ) __asm__ volatile ( \ + "mfc2 $t0, $9;" \ + "mfc2 $t1, $10;" \ + "mfc2 $t2, $11;" \ + "sh $t0, 0( %0 );" \ + "sh $t1, 2( %0 );" \ + "sh $t2, 4( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stlvnl( r0 ) __asm__ volatile ( \ + "swc2 $25, 0( %0 );" \ + "swc2 $26, 4( %0 );" \ + "swc2 $27, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + + +/** + * GTE operation macros + */ + +#define gte_rtps() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0180001;" ) + +#define gte_rtpt() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0280030;" ) + +#define gte_nclip() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x1400006;" ) + +#define gte_avsz3() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x158002D;" ) + +#define gte_avsz4() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x168002E;" ) + +#define gte_sqr0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0A00428;" ) + +#define gte_sqr12() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0A80428;" ) + +#define gte_op0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x170000C;" ) + +#define gte_op12() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x178000C;" ) + +#define gte_ncs() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0C8041E;" ) + +#define gte_nct() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0D80420;" ) + +#define gte_nccs() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x108041B;" ) \ + +#define gte_ncct() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x118043F;" ) + +#define gte_ncds() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0E80413;" ) + +#define gte_ncdt() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0F80416;" ) + +#define gte_cc() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x138041C;" ) + +#define gte_cdp() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x1280414;" ) + +#define gte_dcpl() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0680029;" ) + +#define gte_dpcs() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0780010;" ) + +#define gte_dpct() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0180001;" ) + +#define gte_intpl() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0980011;" ) + +#define gte_gpf0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x190003D;" ) + +#define gte_gpf12() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x198003D;" ) + +#define gte_gpl0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x1A0003E;" ) + +#define gte_gpl12() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x1A8003E;" ) + +#define gte_mvmva_core( r0 ) __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 %0" \ + : \ + : "g"( r0 ) ) + +#define gte_mvmva(sf, mx, v, cv, lm) gte_mvmva_core( 0x0400012 | \ + ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) ) + + +/** + * GTE operation macros without leading nops + * + * Checking assembler output when using these is advised. + */ + +#define gte_rtps_b() __asm__ volatile ( "cop2 0x0180001;" ) +#define gte_rtpt_b() __asm__ volatile ( "cop2 0x0280030;" ) +#define gte_nclip_b() __asm__ volatile ( "cop2 0x1400006;" ) +#define gte_avsz3_b() __asm__ volatile ( "cop2 0x158002D;" ) +#define gte_avsz4_b() __asm__ volatile ( "cop2 0x168002E;" ) +#define gte_sqr0_b() __asm__ volatile ( "cop2 0x0A00428;" ) +#define gte_sqr12_b() __asm__ volatile ( "cop2 0x0A80428;" ) +#define gte_op0_b() __asm__ volatile ( "cop2 0x170000C;" ) +#define gte_op12_b() __asm__ volatile ( "cop2 0x178000C;" ) +#define gte_ncs_b() __asm__ volatile ( "cop2 0x0C8041E;" ) +#define gte_nct_b() __asm__ volatile ( "cop2 0x0D80420;" ) +#define gte_nccs_b() __asm__ volatile ( "cop2 0x108041B;" ) +#define gte_ncct_b() __asm__ volatile ( "cop2 0x118043F;" ) +#define gte_ncds_b() __asm__ volatile ( "cop2 0x0E80413;" ) +#define gte_ncdt_b() __asm__ volatile ( "cop2 0x0F80416;" ) +#define gte_cc_b() __asm__ volatile ( "cop2 0x138041C;" ) +#define gte_cdp_b() __asm__ volatile ( "cop2 0x1280414;" ) +#define gte_dcpl_b() __asm__ volatile ( "cop2 0x0680029;" ) +#define gte_dpcs_b() __asm__ volatile ( "cop2 0x0780010;" ) +#define gte_dpct_b() __asm__ volatile ( "cop2 0x0180001;" ) +#define gte_intpl_b() __asm__ volatile ( "cop2 0x0980011;" ) +#define gte_gpf0_b() __asm__ volatile ( "cop2 0x190003D;" ) +#define gte_gpf12_b() __asm__ volatile ( "cop2 0x198003D;" ) +#define gte_gpl0_b() __asm__ volatile ( "cop2 0x1A0003E;" ) +#define gte_gpl12_b() __asm__ volatile ( "cop2 0x1A8003E;" ) +#define gte_mvmva_core_b( r0 ) __asm__ volatile ( \ + "cop2 %0" \ + : \ + : "g"( r0 ) ) +#define gte_mvmva_b(sf, mx, v, cv, lm) gte_mvmva_core_b( 0x0400012 | \ + ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) ) + +#endif // _MEIDOGTE_INLINE_C_H
\ No newline at end of file diff --git a/libmeidogte/mulmatrix.s b/libmeidogte/mulmatrix.s new file mode 100644 index 0000000..19dabe8 --- /dev/null +++ b/libmeidogte/mulmatrix.s @@ -0,0 +1,74 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global MulMatrix +.type MulMatrix, @function +MulMatrix: + + # Load m1 to GTE + lw $t0, 0($a1) + lw $t1, 4($a1) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a1) + lw $t1, 12($a1) + lhu $t2, 16($a1) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lhu $t1, 2*(0+(3*1))($a0) # Load values for first + lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a0) # Load values for second + lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a0) + sh $t0, 2*(0+(3*2))($a0) + + lhu $t1, 2*(2+(3*1))($a0) # Load values for third + lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a0) + sh $t0, 2*(1+(3*2))($a0) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a0) + sh $t0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 diff --git a/libmeidogte/mulmatrix0.s b/libmeidogte/mulmatrix0.s new file mode 100644 index 0000000..874226b --- /dev/null +++ b/libmeidogte/mulmatrix0.s @@ -0,0 +1,74 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global MulMatrix0 +.type MulMatrix0, @function +MulMatrix0: + + # Load m1 to GTE + lw $t0, 0($a0) + lw $t1, 4($a0) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a0) + lw $t1, 12($a0) + lhu $t2, 16($a0) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lhu $t1, 2*(0+(3*1))($a1) # Load values for first + lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a1) # Load values for second + lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a2) + sh $t0, 2*(0+(3*2))($a2) + + lhu $t1, 2*(2+(3*1))($a1) # Load values for third + lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a2) + sh $t0, 2*(1+(3*2))($a2) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a2) + sh $t0, 2*(2+(3*2))($a2) + + jr $ra + move $v0, $a2 diff --git a/libmeidogte/pushpopmatrix.s b/libmeidogte/pushpopmatrix.s new file mode 100644 index 0000000..d10687a --- /dev/null +++ b/libmeidogte/pushpopmatrix.s @@ -0,0 +1,68 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global PushMatrix +.type PushMatrix, @function +PushMatrix: + la $a0, _matrix_stack + cfc2 $v0, C2_R11R12 + cfc2 $v1, C2_R13R21 + sw $v0, 0($a0) + cfc2 $v0, C2_R22R23 + sw $v1, 4($a0) + sw $v0, 8($a0) + cfc2 $v0, C2_R31R32 + cfc2 $v1, C2_R33 + sw $v0, 12($a0) + sw $v1, 16($a0) + cfc2 $v0, C2_TRX + cfc2 $v1, C2_TRY + sw $v0, 20($a0) + cfc2 $v0, C2_TRZ + sw $v1, 24($a0) + jr $ra + sw $v0, 28($a0) + +.global PopMatrix +.type PopMatrix, @function +PopMatrix: + la $a0, _matrix_stack + lw $v0, 0($a0) + lw $v1, 4($a0) + ctc2 $v0, C2_R11R12 + ctc2 $v1, C2_R13R21 + lw $v0, 8($a0) + lw $v1, 12($a0) + ctc2 $v0, C2_R22R23 + lw $v0, 16($a0) + ctc2 $v1, C2_R31R32 + ctc2 $v0, C2_R33 + lw $v0, 20($a0) + lw $v1, 24($a0) + ctc2 $v0, C2_TRX + lw $v0, 28($a0) + ctc2 $v1, C2_TRY + ctc2 $v0, C2_TRZ + jr $ra + nop + + +.section .data + + +.type matrix_stack, @object +_matrix_stack: + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + diff --git a/libmeidogte/scalematrix.s b/libmeidogte/scalematrix.s new file mode 100644 index 0000000..3e83800 --- /dev/null +++ b/libmeidogte/scalematrix.s @@ -0,0 +1,68 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global ScaleMatrix +.type ScaleMatrix, @function +ScaleMatrix: + + lwc2 C2_IR0, 0($a1) # X + + lh $v0, 2*(0+(3*0))($a0) + lh $v1, 2*(0+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(0+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(0+(3*1))($a0) + sh $v0, 2*(0+(3*2))($a0) + + lwc2 C2_IR0, 4($a1) # Y + + lh $v0, 2*(1+(3*0))($a0) + lh $v1, 2*(1+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(1+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(1+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*1))($a0) + sh $v0, 2*(1+(3*2))($a0) + + lwc2 C2_IR0, 8($a1) # Z + + lh $v0, 2*(2+(3*0))($a0) + lh $v1, 2*(2+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(2+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(2+(3*1))($a0) + sh $v0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 diff --git a/libmeidogte/square0.s b/libmeidogte/square0.s new file mode 100644 index 0000000..d037b7e --- /dev/null +++ b/libmeidogte/square0.s @@ -0,0 +1,27 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global Square0 +.type Square0, @function +Square0: + + # a0 - Pointer to input vector (v0) + # a1 - Pointer to output vector (v1) + + lwc2 C2_IR1, 0($a0) + lwc2 C2_IR2, 4($a0) + lwc2 C2_IR3, 8($a0) + + nSQR(0) + + swc2 C2_IR1, 0($a1) + swc2 C2_IR2, 4($a1) + swc2 C2_IR3, 8($a1) + + jr $ra + nop diff --git a/libmeidogte/squareroot.s b/libmeidogte/squareroot.s new file mode 100644 index 0000000..af095a2 --- /dev/null +++ b/libmeidogte/squareroot.s @@ -0,0 +1,121 @@ +.set noreorder + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + +.global SquareRoot12 +.type SquareRoot12, @function +SquareRoot12: + mtc2 $a0, C2_LZCS + nop + nop + mfc2 $v0, C2_LZCR + beq $v0, 32, $bad_sqr12 + nop + andi $t0, $v0, 0x1 + addiu $v1, $0 , -2 + and $t2, $v0, $v1 + li $t1, 19 + sub $t1, $t2 + sra $t1, 1 + addi $t3, $t2, -24 + bltz $t3, $value_less12 + nop + sllv $t4, $a0, $t3 + b $value_greater12 +$value_less12: + addiu $t3, $0 , 24 + sub $t3, $t2 + srav $t4, $a0, $t3 +$value_greater12: + addi $t4, -64 + sll $t4, 1 + la $t5, sqrt_table + addu $t5, $t4 + lh $t5, 0($t5) + nop + + bltz $t1, $1594c + nop + jr $ra + sllv $v0, $t5, $t1 + +$1594c: + + sub $t1, $0 , $t1 + jr $ra + srl $v0, $t5, $t1 + +$bad_sqr12: + jr $ra + move $v0, $0 + + +.global SquareRoot0 +.type SquareRoot0, @function +SquareRoot0: + mtc2 $a0, C2_LZCS + nop + nop + mfc2 $v0, C2_LZCR + beq $v0, 32, $bad_sqr + nop + andi $t0, $v0, 0x1 + addiu $v1, $0 , -2 + and $t2, $v0, $v1 + li $t1, 31 + sub $t1, $t2 + sra $t1, 1 + addi $t3, $t2, -24 + bltz $t3, $value_less + nop + sllv $t4, $a0, $t3 + b $value_greater +$value_less: + addiu $t3, $0 , 24 + sub $t3, $t2 + srav $t4, $a0, $t3 +$value_greater: + addi $t4, -64 + sll $t4, 1 + la $t5, sqrt_table + addu $t5, $t4 + lh $t5, 0($t5) + nop + sllv $t5, $t5, $t1 + jr $ra + srl $v0, $t5, 12 +$bad_sqr: + jr $ra + move $v0, $0 + + +.section .data + +sqrt_table: + .hword 0x1000,0x101f,0x103f,0x105e,0x107e,0x109c,0x10bb,0x10da + .hword 0x10f8,0x1116,0x1134,0x1152,0x116f,0x118c,0x11a9,0x11c6 + .hword 0x11e3,0x1200,0x121c,0x1238,0x1254,0x1270,0x128c,0x12a7 + .hword 0x12c2,0x12de,0x12f9,0x1314,0x132e,0x1349,0x1364,0x137e + .hword 0x1398,0x13b2,0x13cc,0x13e6,0x1400,0x1419,0x1432,0x144c + .hword 0x1465,0x147e,0x1497,0x14b0,0x14c8,0x14e1,0x14f9,0x1512 + .hword 0x152a,0x1542,0x155a,0x1572,0x158a,0x15a2,0x15b9,0x15d1 + .hword 0x15e8,0x1600,0x1617,0x162e,0x1645,0x165c,0x1673,0x1689 + .hword 0x16a0,0x16b7,0x16cd,0x16e4,0x16fa,0x1710,0x1726,0x173c + .hword 0x1752,0x1768,0x177e,0x1794,0x17aa,0x17bf,0x17d5,0x17ea + .hword 0x1800,0x1815,0x182a,0x183f,0x1854,0x1869,0x187e,0x1893 + .hword 0x18a8,0x18bd,0x18d1,0x18e6,0x18fa,0x190f,0x1923,0x1938 + .hword 0x194c,0x1960,0x1974,0x1988,0x199c,0x19b0,0x19c4,0x19d8 + .hword 0x19ec,0x1a00,0x1a13,0x1a27,0x1a3a,0x1a4e,0x1a61,0x1a75 + .hword 0x1a88,0x1a9b,0x1aae,0x1ac2,0xa1d5,0x1ae8,0x1afb,0x1b0e + .hword 0x1b21,0x1b33,0x1b46,0x1b59,0x1b6c,0x1b7e,0x1b91,0x1ba3 + .hword 0x1bb6,0x1bc8,0x1bdb,0x1bed,0x1c00,0x1c12,0x1c24,0x1c36 + .hword 0x1c48,0x1c5a,0x1c6c,0x1c7e,0x1c90,0x1ca2,0x1cb4,0x1cc6 + .hword 0x1cd8,0x1ce9,0x1cfb,0x1d0d,0x1d1e,0x1d30,0x1d41,0x1d53 + .hword 0x1d64,0x1d76,0x1d87,0x1d98,0x1daa,0x1dbb,0x1dcc,0x1ddd + .hword 0x1dee,0x1e00,0x1e11,0x1e22,0x1e33,0x1e43,0x1e54,0x1e65 + .hword 0x1e76,0x1e87,0x1e98,0x1ea8,0x1eb9,0x1eca,0x1eda,0x1eeb + .hword 0x1efb,0x1f0c,0x1f1c,0x1f2d,0x1f3d,0x1f4e,0x1f5e,0x1f6e + .hword 0x1f7e,0x1f8f,0x1f9f,0x1faf,0x1fbf,0x1fcf,0x1fdf,0x1fef diff --git a/libmeidogte/vectornormals.s b/libmeidogte/vectornormals.s new file mode 100644 index 0000000..0dbe1e8 --- /dev/null +++ b/libmeidogte/vectornormals.s @@ -0,0 +1,107 @@ +.set noreorder +.set noat + +.include "gtereg.h" +.include "inline_s.h" + +.section .text + + +.global VectorNormalS +.type VectorNormalS, @function +VectorNormalS: + lw $t0, 0($a0) + lw $t1, 4($a0) + lw $t2, 8($a0) + + mtc2 $t0, C2_IR1 + mtc2 $t1, C2_IR2 + mtc2 $t2, C2_IR3 + + nSQR(0) + + mfc2 $t3, C2_MAC1 + mfc2 $t4, C2_MAC2 + mfc2 $t5, C2_MAC3 + + add $t3, $t4 + add $v0, $t3, $t5 + mtc2 $v0, C2_LZCS + nop + nop + mfc2 $v1, C2_LZCR + + addiu $at, $0 , -2 + and $v1, $at + + addiu $t6, $0 , 0x1f + sub $t6, $v1 + sra $t6, 1 + addiu $t3, $v1, -24 + + bltz $t3, $value_neg + nop + b $value_pos + sllv $t4, $v0, $t3 +$value_neg: + addiu $t3, $0 , 24 + sub $t3, $v1 + srav $t4, $v0, $t3 +$value_pos: + addi $t4, -64 + sll $t4, 1 + + la $t5, _norm_table + addu $t5, $t4 + lh $t5, 0($t5) + nop + + mtc2 $t5, C2_IR0 + mtc2 $t0, C2_IR1 + mtc2 $t1, C2_IR2 + mtc2 $t2, C2_IR3 + + nGPF(0) + + mfc2 $t0, C2_MAC1 + mfc2 $t1, C2_MAC2 + mfc2 $t2, C2_MAC3 + + sra $t0, $t6 + sra $t1, $t6 + sra $t2, $t6 + + sh $t0, 0($a1) + sh $t1, 2($a1) + jr $ra + sh $t2, 4($a1) + + +.section .data + +_norm_table: + .hword 0x1000, 0x0FE0, 0x0FC1, 0x0FA3, 0x0F85, 0x0F68, 0x0F4C, 0x0F30 + .hword 0x0F15, 0x0EFB, 0x0EE1, 0x0EC7, 0x0EAE, 0x0E96, 0x0E7E, 0x0E66 + .hword 0x0E4F, 0x0E38, 0x0E22, 0x0E0C, 0x0DF7, 0x0DE2, 0x0DCD, 0x0DB9 + .hword 0x0DA5, 0x0D91, 0x0D7E, 0x0D6B, 0x0D58, 0x0D45, 0x0D33, 0x0D21 + .hword 0x0D10, 0x0CFF, 0x0CEE, 0x0CDD, 0x0CCC, 0x0CBC, 0x0CAC, 0x0C9C + .hword 0x0C8D, 0x0C7D, 0x0C6E, 0x0C5F, 0x0C51, 0x0C42, 0x0C34, 0x0C26 + .hword 0x0C18, 0x0C0A, 0x0BFD, 0x0BEF, 0x0BE2, 0x0BD5, 0x0BC8, 0x0BBB + .hword 0x0BAF, 0x0BA2, 0x0B96, 0x0B8A, 0x0B7E, 0x0B72, 0x0B67, 0x0B5B + .hword 0x0B50, 0x0B45, 0x0B39, 0x0B2E, 0x0B24, 0x0B19, 0x0B0E, 0x0B04 + .hword 0x0AF9, 0x0AEF, 0x0AE5, 0x0ADB, 0x0AD1, 0x0AC7, 0x0ABD, 0x0AB4 + .hword 0x0AAA, 0x0AA1, 0x0A97, 0x0A8E, 0x0A85, 0x0A7C, 0x0A73, 0x0A6A + .hword 0x0A61, 0x0A59, 0x0A50, 0x0A47, 0x0A3F, 0x0A37, 0x0A2E, 0x0A26 + .hword 0x0A1E, 0x0A16, 0x0A0E, 0x0A06, 0x09FE, 0x09F6, 0x09EF, 0x09E7 + .hword 0x09E0, 0x09D8, 0x09D1, 0x09C9, 0x09C2, 0x09BB, 0x09B4, 0x09AD + .hword 0x09A5, 0x099E, 0x0998, 0x0991, 0x098A, 0x0983, 0x097C, 0x0976 + .hword 0x096F, 0x0969, 0x0962, 0x095C, 0x0955, 0x094F, 0x0949, 0x0943 + .hword 0x093C, 0x0936, 0x0930, 0x092A, 0x0924, 0x091E, 0x0918, 0x0912 + .hword 0x090D, 0x0907, 0x0901, 0x08FB, 0x08F6, 0x08F0, 0x08EB, 0x08E5 + .hword 0x08E0, 0x08DA, 0x08D5, 0x08CF, 0x08CA, 0x08C5, 0x08BF, 0x08BA + .hword 0x08B5, 0x08B0, 0x08AB, 0x08A6, 0x08A1, 0x089C, 0x0897, 0x0892 + .hword 0x088D, 0x0888, 0x0883, 0x087E, 0x087A, 0x0875, 0x0870, 0x086B + .hword 0x0867, 0x0862, 0x085E, 0x0859, 0x0855, 0x0850, 0x084C, 0x0847 + .hword 0x0843, 0x083E, 0x083A, 0x0836, 0x0831, 0x082D, 0x0829, 0x0824 + .hword 0x0820, 0x081C, 0x0818, 0x0814, 0x0810, 0x080C, 0x0808, 0x0804 + |
