diff options
| author | iCatButler <i.am.catbutler@gmail.com> | 2016-07-30 17:43:12 +0100 |
|---|---|---|
| committer | iCatButler <i.am.catbutler@gmail.com> | 2016-07-30 17:43:12 +0100 |
| commit | 69f33a4782857bf2027db6c81f670409bed76b43 (patch) | |
| tree | ca5ac7afe837748e8c1db89c2519eabf367df4ea /libpcsxcore/pgxp_cpu.c | |
| parent | d5b40fbbe0eee90573ec1848ac135962fba9438e (diff) | |
| download | pcsxr-69f33a4782857bf2027db6c81f670409bed76b43.tar.gz | |
Various CPU updates
- Sign extend values read using LH
- Add conversion functions to represent Signed/Unsigned 16-bit ranges
- Add overflow and truncation functions for 16-bit ranges
- Sign extend imm value in ADD(U)
- Add component overflow and truncation to ADD/SUB functions
- Construct new value in logic operators where result using inputs is undefined
- Return a valid W component from logic operators (if either input has one)
- Compare against high value (y), then low value (x) in less than operators
- Use doubles and implement overflow for Multiply operations to try to increase accuracy
- Use unsigned values in both MUL and DIV operations to make output as accurate as possible
- Implement several variants of shift operators. Trying both arithmetically correct and more analytical approaches with varying success.
Debug updates
- Added ability to force all values to be equal to low precision values before operating on them
- Added feature to test output of operations against a tolerance and print only those which fail
GPU updates
- Colour vertices with valid XY coordinates but no W as cyan to make them easier to spot
- Remove cyan colouring for stale vertices (wasn't useful)
- Added ability to skip debug rendering when needed (like seeing the output of offscreen rendering applied to a sprite).
- Added new mode which shows primitive type
Diffstat (limited to 'libpcsxcore/pgxp_cpu.c')
| -rw-r--r-- | libpcsxcore/pgxp_cpu.c | 840 |
1 files changed, 717 insertions, 123 deletions
diff --git a/libpcsxcore/pgxp_cpu.c b/libpcsxcore/pgxp_cpu.c index 7f15bd81..2b36da57 100644 --- a/libpcsxcore/pgxp_cpu.c +++ b/libpcsxcore/pgxp_cpu.c @@ -3,6 +3,8 @@ #include "pgxp_value.h" #include "pgxp_mem.h" +#include "pgxp_debug.h" + // CPU registers PGXP_value CPU_reg_mem[34]; //PGXP_value CPU_Hi, CPU_Lo; @@ -83,108 +85,155 @@ void PGXP_CPU_ADDI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs + Imm (signed) psx_value tempImm; - + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; + tempImm.d = imm(instr); + tempImm.sd = (tempImm.sd << 16) >> 16; // sign extend - tempImm.w.h = imm(instr); - CPU_reg[rt(instr)].x += tempImm.sw.h; - // handle x overflow in to y? + ret.x = f16Unsign(ret.x); + ret.x += tempImm.w.l; + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = f16Sign(ret.x); + //ret.x -= of * (USHRT_MAX + 1); + ret.y += tempImm.sw.h + of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + CPU_reg[rt(instr)] = ret; CPU_reg[rt(instr)].value = rtVal; } void PGXP_CPU_ADDIU(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs + Imm (signed) (unsafe?) - psx_value tempImm; - - Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; - - tempImm.w.h = imm(instr); - CPU_reg[rt(instr)].x += tempImm.sw.h; - // handle x overflow in to y? - - CPU_reg[rt(instr)].value = rtVal; + PGXP_CPU_ADDI(instr, rtVal, rsVal); } void PGXP_CPU_ANDI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs & Imm + psx_value vRt; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; + + vRt.d = rtVal; - CPU_reg[rt(instr)].y = 0.f; // remove upper 16-bits + ret.y = 0.f; // remove upper 16-bits switch (imm(instr)) { case 0: // if 0 then x == 0 - CPU_reg[rt(instr)].x = 0.f; + ret.x = 0.f; break; case 0xFFFF: - // if saturated then x = x + // if saturated then x == x break; default: - // x is undefined, invalidate value - CPU_reg[rt(instr)].flags = 0; + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; } + ret.flags |= VALID_1; + + CPU_reg[rt(instr)] = ret; CPU_reg[rt(instr)].value = rtVal; } void PGXP_CPU_ORI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs | Imm + psx_value vRt; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; - // Invalidate on non-zero values for now - if (imm(instr) != 0) - CPU_reg[rt(instr)].flags = 0; + vRt.d = rtVal; - CPU_reg[rt(instr)].value = rtVal; + switch (imm(instr)) + { + case 0: + // if 0 then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.value = rtVal; + CPU_reg[rt(instr)] = ret; } void PGXP_CPU_XORI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs ^ Imm + psx_value vRt; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; - // Invalidate on non-zero values for now - if (imm(instr) != 0) - CPU_reg[rt(instr)].flags = 0; + vRt.d = rtVal; - CPU_reg[rt(instr)].value = rtVal; + switch (imm(instr)) + { + case 0: + // if 0 then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.value = rtVal; + CPU_reg[rt(instr)] = ret; } void PGXP_CPU_SLTI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs < Imm (signed) psx_value tempImm; + PGXP_value ret; Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; tempImm.w.h = imm(instr); - CPU_reg[rt(instr)].y = 0.f; - CPU_reg[rt(instr)].x = (CPU_reg[rs(instr)].x < tempImm.sw.h) ? 1.f : 0.f; + ret.y = 0.f; + ret.x = (CPU_reg[rs(instr)].x < tempImm.sw.h) ? 1.f : 0.f; + ret.flags |= VALID_1; + ret.value = rtVal; - CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)] = ret; } void PGXP_CPU_SLTIU(u32 instr, u32 rtVal, u32 rsVal) { - // Rt = Rs < Imm (signed) + // Rt = Rs < Imm (Unsigned) + psx_value tempImm; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; - CPU_reg[rt(instr)].y = 0.f; - CPU_reg[rt(instr)].x = (fabs(CPU_reg[rs(instr)].x) < (u32)imm(instr)) ? 1.f : 0.f; + tempImm.w.h = imm(instr); + ret.y = 0.f; + ret.x = (f16Unsign(CPU_reg[rs(instr)].x) < tempImm.w.h) ? 1.f : 0.f; + ret.flags |= VALID_1; + ret.value = rtVal; - CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)] = ret; } //////////////////////////////////// @@ -197,11 +246,13 @@ void PGXP_CPU_LUI(u32 instr, u32 rtVal) CPU_reg[rt(instr)].y = (float)(s16)imm(instr); CPU_reg[rt(instr)].hFlags = VALID_HALF; CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)].flags = VALID_01; } //////////////////////////////////// // Register Arithmetic //////////////////////////////////// + void PGXP_CPU_ADD(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) { // Rd = Rs + Rt (signed) @@ -218,8 +269,19 @@ void PGXP_CPU_ADD(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) ret = CPU_reg[rs(instr)]; - ret.x += CPU_reg[rt(instr)].x; - ret.y += CPU_reg[rt(instr)].y; + ret.x = f16Unsign(ret.x); + ret.x += f16Unsign(CPU_reg[rt(instr)].x); + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = f16Sign(ret.x); + //ret.x -= of * (USHRT_MAX + 1); + ret.y += CPU_reg[rt(instr)].y + of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + + // TODO: decide which "z/w" component to use ret.halfFlags[0] &= CPU_reg[rt(instr)].halfFlags[0]; ret.gFlags |= CPU_reg[rt(instr)].gFlags; @@ -253,8 +315,17 @@ void PGXP_CPU_SUB(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) ret = CPU_reg[rs(instr)]; - ret.x -= CPU_reg[rt(instr)].x; - ret.y -= CPU_reg[rt(instr)].y; + ret.x = f16Unsign(ret.x); + ret.x -= f16Unsign(CPU_reg[rt(instr)].x); + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = f16Sign(ret.x); + //ret.x -= of * (USHRT_MAX + 1); + ret.y -= CPU_reg[rt(instr)].y - of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; ret.halfFlags[0] &= CPU_reg[rt(instr)].halfFlags[0]; ret.gFlags |= CPU_reg[rt(instr)].gFlags; @@ -348,6 +419,18 @@ void PGXP_CPU_AND(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) // ret.valid = 1; // /iCB Hack + // Get a valid W + if ((CPU_reg[rs(instr)].flags & VALID_2) == VALID_2) + { + ret.z = CPU_reg[rs(instr)].z; + ret.compFlags[2] = CPU_reg[rs(instr)].compFlags[2]; + } + else if((CPU_reg[rt(instr)].flags & VALID_2) == VALID_2) + { + ret.z = CPU_reg[rt(instr)].z; + ret.compFlags[2] = CPU_reg[rt(instr)].compFlags[2]; + } + ret.value = rdVal; CPU_reg[rd(instr)] = ret; } @@ -385,10 +468,10 @@ void PGXP_CPU_SLT(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) } ret = CPU_reg[rs(instr)]; - - // TODO: fix for single or double values? ret.y = 0.f; - ret.x = (CPU_reg[rs(instr)].x < CPU_reg[rt(instr)].x) ? 1.f : 0.f; + ret.compFlags[1] = VALID; + + ret.x = (CPU_reg[rs(instr)].y < CPU_reg[rt(instr)].y) ? 1.f : (f16Unsign(CPU_reg[rs(instr)].x) < f16Unsign(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -409,9 +492,10 @@ void PGXP_CPU_SLTU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) } ret = CPU_reg[rs(instr)]; - ret.y = 0.f; - ret.x = (fabs(CPU_reg[rs(instr)].x) < fabs(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; + ret.compFlags[1] = VALID; + + ret.x = (f16Unsign(CPU_reg[rs(instr)].y) < f16Unsign(CPU_reg[rt(instr)].y)) ? 1.f : (f16Unsign(CPU_reg[rs(instr)].x) < f16Unsign(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -420,6 +504,7 @@ void PGXP_CPU_SLTU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) //////////////////////////////////// // Register mult/div //////////////////////////////////// + void PGXP_CPU_MULT(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) { // Hi/Lo = Rs * Rt (signed) @@ -433,14 +518,36 @@ void PGXP_CPU_MULT(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = CPU_reg[rs(instr)].y + (CPU_reg[rs(instr)].x / (float)(1 << 16)); - float vt = CPU_reg[rt(instr)].y + (CPU_reg[rt(instr)].x / (float)(1 << 16)); - - CPU_Hi.x = vs * vt;// CPU_reg[rs(instr)].y * CPU_reg[rt(instr)].y; - CPU_Lo.y = (CPU_Hi.x - ((s32)CPU_Hi.x)) * (float)(1 << 16);// CPU_reg[rs(instr)].x * CPU_reg[rt(instr)].x; // Get fractional part + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double xx, xy, yx, yy; + double lx = 0, ly = 0, hx = 0, hy = 0; + s64 of = 0; + + // Multiply out components + xx = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].x); + xy = f16Unsign(CPU_reg[rs(instr)].x) * (CPU_reg[rt(instr)].y); + yx = (CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].x); + yy = (CPU_reg[rs(instr)].y) * (CPU_reg[rt(instr)].y); + + // Split values into outputs + lx = xx; + + ly = f16Overflow(xx); + ly += xy + yx; + + hx = f16Overflow(ly); + hx += yy; + + hy = f16Overflow(hx); + + CPU_Lo.x = f16Sign(lx); + CPU_Lo.y = f16Sign(ly); + CPU_Hi.x = f16Sign(hx); + CPU_Hi.y = f16Sign(hy); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } @@ -458,22 +565,44 @@ void PGXP_CPU_MULTU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = fabs(CPU_reg[rs(instr)].y) + (fabs(CPU_reg[rs(instr)].x) / (float)(1 << 16)); - float vt = fabs(CPU_reg[rt(instr)].y) + (fabs(CPU_reg[rt(instr)].x) / (float)(1 << 16)); - - CPU_Hi.x = vs * vt;// fabs(CPU_reg[rs(instr)].y) * fabs(CPU_reg[rt(instr)].y); - CPU_Lo.y = (CPU_Hi.x - ((s32)CPU_Hi.x)) * (float)(1 << 16);// fabs(CPU_reg[rs(instr)].x) * fabs(CPU_reg[rt(instr)].x); // Get fractional part + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double xx, xy, yx, yy; + double lx = 0, ly = 0, hx = 0, hy = 0; + s64 of = 0; + + // Multiply out components + xx = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].x); + xy = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].y); + yx = f16Unsign(CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].x); + yy = f16Unsign(CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].y); + + // Split values into outputs + lx = xx; + + ly = f16Overflow(xx); + ly += xy + yx; + + hx = f16Overflow(ly); + hx += yy; + + hy = f16Overflow(hx); + + CPU_Lo.x = f16Sign(lx); + CPU_Lo.y = f16Sign(ly); + CPU_Hi.x = f16Sign(hx); + CPU_Hi.y = f16Sign(hy); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } void PGXP_CPU_DIV(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) { - // Hi = Rs / Rt (signed) - // Lo = Rs % Rt (signed) + // Lo = Rs / Rt (signed) + // Hi = Rs % Rt (signed) Validate(&CPU_reg[rs(instr)], rsVal); Validate(&CPU_reg[rt(instr)], rtVal); @@ -484,23 +613,29 @@ void PGXP_CPU_DIV(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = CPU_reg[rs(instr)].y + (CPU_reg[rs(instr)].x / (float)(1 << 16)); - float vt = CPU_reg[rt(instr)].y + (CPU_reg[rt(instr)].x / (float)(1 << 16)); - - CPU_Lo.x = vs / vt; - CPU_Hi.x = fmod(vs, vt); - CPU_Lo.x -= CPU_Hi.x; + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double vs = f16Unsign(CPU_reg[rs(instr)].x) + (CPU_reg[rs(instr)].y) * (double)(1 << 16); + double vt = f16Unsign(CPU_reg[rt(instr)].x) + (CPU_reg[rt(instr)].y) * (double)(1 << 16); + + double lo = vs / vt; + CPU_Lo.y = f16Sign(f16Overflow(lo)); + CPU_Lo.x = f16Sign(lo); + + double hi = fmod(vs, vt); + CPU_Hi.y = f16Sign(f16Overflow(hi)); + CPU_Hi.x = f16Sign(hi); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } void PGXP_CPU_DIVU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) { - // Hi = Rs / Rt (unsigned) - // Lo = Rs % Rt (unsigned) + // Lo = Rs / Rt (unsigned) + // Hi = Rs % Rt (unsigned) Validate(&CPU_reg[rs(instr)], rsVal); Validate(&CPU_reg[rt(instr)], rtVal); @@ -511,15 +646,21 @@ void PGXP_CPU_DIVU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = CPU_reg[rs(instr)].y + (CPU_reg[rs(instr)].x / (float)(1 << 16)); - float vt = CPU_reg[rt(instr)].y + (CPU_reg[rt(instr)].x / (float)(1 << 16)); - - CPU_Lo.x = fabs(vs) / fabs(vt); - CPU_Hi.x = fmod(fabs(vs), fabs(vt)); - CPU_Lo.x -= CPU_Hi.x; + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double vs = f16Unsign(CPU_reg[rs(instr)].x) + f16Unsign(CPU_reg[rs(instr)].y) * (double)(1 << 16); + double vt = f16Unsign(CPU_reg[rt(instr)].x) + f16Unsign(CPU_reg[rt(instr)].y) * (double)(1 << 16); + + double lo = vs / vt; + CPU_Lo.y = f16Sign(f16Overflow(lo)); + CPU_Lo.x = f16Sign(lo); + + double hi = fmod(vs, vt); + CPU_Hi.y = f16Sign(f16Overflow(hi)); + CPU_Hi.x = f16Sign(hi); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } @@ -533,21 +674,89 @@ void PGXP_CPU_SLL(u32 instr, u32 rdVal, u32 rtVal) PGXP_value ret; u32 sh = sa(instr); Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; - // Shift y into x? - if (sh >= 16) + // TODO: Shift flags +#if 1 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.y = ret.x; - ret.x = 0; - ret.hFlags = ret.lFlags; - ret.lFlags = 0; - sh -= 16; + x = 0.f; + y = 0.f; } - - // assume multiply with no overflow - ret.x *= (float)(1 << sh); - ret.y *= (float)(1 << sh); + else if (sh == 16) + { + y = f16Sign(x); + x = 0.f; + } + else if (sh >= 16) + { + y = x * (1 << (sh - 16)); + y = f16Sign(y); + x = 0.f; + } + else + { + x = x * (1 << sh); + y = y * (1 << sh); + y += f16Overflow(x); + x = f16Sign(x); + y = f16Sign(y); + } +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.w.h = 0; // remove Y + iY.w.l = 0; // remove X + + // Shift test values + psx_value dX; + dX.d = iX.d << sh; + psx_value dY; + dY.d = iY.d << sh; + + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y * (1 << sh); + + if (dX.sw.h != 0.f) + { + if (sh == 16) + { + y = x; + } + else if (sh < 16) + { + y += f16Unsign(x) / (1 << (16 - sh)); + //if (in.x < 0) + // y += 1 << (16 - sh); + } + else + { + y += x * (1 << (sh - 16)); + } + } + + // if there's anything left of X write it in + if (dX.w.l != 0.f) + x = x * (1 << sh); + else + x = 0; + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -559,21 +768,90 @@ void PGXP_CPU_SRL(u32 instr, u32 rdVal, u32 rtVal) PGXP_value ret; u32 sh = sa(instr); Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; - // Shift x into y? - if (sh >= 16) +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.x = ret.y; - ret.y = 0; - ret.lFlags = ret.hFlags; - ret.hFlags = 0; - sh -= 16; + x = y = 0.f; } - - // assume divide with no overflow - ret.x /= (float)(1 << sh); - ret.y /= (float)(1 << sh); + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.d = iY.d >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -582,7 +860,98 @@ void PGXP_CPU_SRL(u32 instr, u32 rdVal, u32 rtVal) void PGXP_CPU_SRA(u32 instr, u32 rdVal, u32 rtVal) { // Rd = Rt >> Sa - PGXP_CPU_SRL(instr, rdVal, rtVal); + PGXP_value ret; + u32 sh = sa(instr); + Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; + +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = (CPU_reg[rt(instr)].y); + if (sh >= 32) + { + // sign extend + x = y = (y < 0) ? -1.f : 0.f; + } + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } + +#else + double x = CPU_reg[rt(instr)].x, y = CPU_reg[rt(instr)].y; + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.sd = iY.sd >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; } //////////////////////////////////// @@ -598,19 +967,84 @@ void PGXP_CPU_SLLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) ret = CPU_reg[rt(instr)]; - // Shift y into x? - if (sh >= 16) +#if 1 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.y = ret.x; - ret.x = 0; - ret.hFlags = ret.lFlags; - ret.lFlags = 0; - sh -= 16; + x = 0.f; + y = 0.f; + } + else if (sh == 16) + { + y = f16Sign(x); + x = 0.f; + } + else if (sh >= 16) + { + y = x * (1 << (sh - 16)); + y = f16Sign(y); + x = 0.f; } + else + { + x = x * (1 << sh); + y = y * (1 << sh); + y += f16Overflow(x); + x = f16Sign(x); + y = f16Sign(y); + } +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; - // assume multiply with no overflow - ret.x *= (float)(1 << sh); - ret.y *= (float)(1 << sh); + iX.w.h = 0; // remove Y + iY.w.l = 0; // remove X + + // Shift test values + psx_value dX; + dX.d = iX.d << sh; + psx_value dY; + dY.d = iY.d << sh; + + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y * (1 << sh); + + if (dX.sw.h != 0.f) + { + if (sh == 16) + { + y = x; + } + else if (sh < 16) + { + y += f16Unsign(x) / (1 << (16 - sh)); + //if (in.x < 0) + // y += 1 << (16 - sh); + } + else + { + y += x * (1 << (sh - 16)); + } + } + + // if there's anything left of X write it in + if (dX.w.l != 0.f) + x = x * (1 << sh); + else + x = 0; + + x = f16Sign(x); + y = f16Sign(y); + +#endif + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -626,19 +1060,89 @@ void PGXP_CPU_SRLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) ret = CPU_reg[rt(instr)]; - // Shift x into y? - if (sh >= 16) +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.x = ret.y; - ret.y = 0; - ret.lFlags = ret.hFlags; - ret.hFlags = 0; - sh -= 16; + x = y = 0.f; + } + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } + +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); - // assume divide with no overflow - ret.x /= (float)(1 << sh); - ret.y /= (float)(1 << sh); + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.d = iY.d >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -646,7 +1150,99 @@ void PGXP_CPU_SRLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) void PGXP_CPU_SRAV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) { - PGXP_CPU_SRLV(instr, rdVal, rtVal, rsVal); + // Rd = Rt >> Sa + PGXP_value ret; + u32 sh = rsVal & 0x1F; + Validate(&CPU_reg[rt(instr)], rtVal); + Validate(&CPU_reg[rs(instr)], rsVal); + + ret = CPU_reg[rt(instr)]; +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) + { + x = y = 0.f; + } + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } + +#else + double x = CPU_reg[rt(instr)].x, y = CPU_reg[rt(instr)].y; + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.sd = iY.sd >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; } //////////////////////////////////// @@ -699,7 +1295,6 @@ void PGXP_CPU_LW(u32 instr, u32 rtVal, u32 addr) { // Rt = Mem[Rs + Im] ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal); - //CPU_reg[rt(instr)] = PGXP_validateXY(ReadMem(addr), rtVal); } void PGXP_CPU_LWR(u32 instr, u32 rtVal, u32 addr) @@ -714,7 +1309,7 @@ void PGXP_CPU_LH(u32 instr, u16 rtVal, u32 addr) // Rt = Mem[Rs + Im] (sign extended) psx_value val; val.sd = (s32)(s16)rtVal; - ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d); + ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d, 1); } void PGXP_CPU_LHU(u32 instr, u16 rtVal, u32 addr) @@ -723,7 +1318,7 @@ void PGXP_CPU_LHU(u32 instr, u16 rtVal, u32 addr) psx_value val; val.d = rtVal; val.w.h = 0; - ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d); + ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d, 0); } // Load 8-bit @@ -749,7 +1344,6 @@ void PGXP_CPU_SW(u32 instr, u32 rtVal, u32 addr) // Mem[Rs + Im] = Rt Validate(&CPU_reg[rt(instr)], rtVal); WriteMem(&CPU_reg[rt(instr)], addr); - //WriteMemOld(PGXP_validateXY(&CPU_reg[rt(instr)], rtVal), addr); } void PGXP_CPU_SWR(u32 instr, u32 rtVal, u32 addr) |
