diff --git a/libpcsxcore/pgxp_cpu.c b/libpcsxcore/pgxp_cpu.c index 7f15bd81..2b36da57 100644 --- a/libpcsxcore/pgxp_cpu.c +++ b/libpcsxcore/pgxp_cpu.c @@ -3,6 +3,8 @@ #include "pgxp_value.h" #include "pgxp_mem.h" +#include "pgxp_debug.h" + // CPU registers PGXP_value CPU_reg_mem[34]; //PGXP_value CPU_Hi, CPU_Lo; @@ -83,108 +85,155 @@ void PGXP_CPU_ADDI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs + Imm (signed) psx_value tempImm; - + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; + tempImm.d = imm(instr); + tempImm.sd = (tempImm.sd << 16) >> 16; // sign extend - tempImm.w.h = imm(instr); - CPU_reg[rt(instr)].x += tempImm.sw.h; - // handle x overflow in to y? + ret.x = f16Unsign(ret.x); + ret.x += tempImm.w.l; + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = f16Sign(ret.x); + //ret.x -= of * (USHRT_MAX + 1); + ret.y += tempImm.sw.h + of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + + CPU_reg[rt(instr)] = ret; CPU_reg[rt(instr)].value = rtVal; } void PGXP_CPU_ADDIU(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs + Imm (signed) (unsafe?) - psx_value tempImm; - - Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; - - tempImm.w.h = imm(instr); - CPU_reg[rt(instr)].x += tempImm.sw.h; - // handle x overflow in to y? - - CPU_reg[rt(instr)].value = rtVal; + PGXP_CPU_ADDI(instr, rtVal, rsVal); } void PGXP_CPU_ANDI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs & Imm - Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + psx_value vRt; + PGXP_value ret; - CPU_reg[rt(instr)].y = 0.f; // remove upper 16-bits + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + + vRt.d = rtVal; + + ret.y = 0.f; // remove upper 16-bits switch (imm(instr)) { case 0: // if 0 then x == 0 - CPU_reg[rt(instr)].x = 0.f; + ret.x = 0.f; break; case 0xFFFF: - // if saturated then x = x + // if saturated then x == x break; default: - // x is undefined, invalidate value - CPU_reg[rt(instr)].flags = 0; + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; } + ret.flags |= VALID_1; + + CPU_reg[rt(instr)] = ret; CPU_reg[rt(instr)].value = rtVal; } void PGXP_CPU_ORI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs | Imm + psx_value vRt; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; - // Invalidate on non-zero values for now - if (imm(instr) != 0) - CPU_reg[rt(instr)].flags = 0; + vRt.d = rtVal; - CPU_reg[rt(instr)].value = rtVal; + switch (imm(instr)) + { + case 0: + // if 0 then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.value = rtVal; + CPU_reg[rt(instr)] = ret; } void PGXP_CPU_XORI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs ^ Imm + psx_value vRt; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; - // Invalidate on non-zero values for now - if (imm(instr) != 0) - CPU_reg[rt(instr)].flags = 0; + vRt.d = rtVal; - CPU_reg[rt(instr)].value = rtVal; + switch (imm(instr)) + { + case 0: + // if 0 then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.value = rtVal; + CPU_reg[rt(instr)] = ret; } void PGXP_CPU_SLTI(u32 instr, u32 rtVal, u32 rsVal) { // Rt = Rs < Imm (signed) psx_value tempImm; + PGXP_value ret; Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; tempImm.w.h = imm(instr); - CPU_reg[rt(instr)].y = 0.f; - CPU_reg[rt(instr)].x = (CPU_reg[rs(instr)].x < tempImm.sw.h) ? 1.f : 0.f; + ret.y = 0.f; + ret.x = (CPU_reg[rs(instr)].x < tempImm.sw.h) ? 1.f : 0.f; + ret.flags |= VALID_1; + ret.value = rtVal; - CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)] = ret; } void PGXP_CPU_SLTIU(u32 instr, u32 rtVal, u32 rsVal) { - // Rt = Rs < Imm (signed) + // Rt = Rs < Imm (Unsigned) + psx_value tempImm; + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); - CPU_reg[rt(instr)] = CPU_reg[rs(instr)]; + ret = CPU_reg[rs(instr)]; - CPU_reg[rt(instr)].y = 0.f; - CPU_reg[rt(instr)].x = (fabs(CPU_reg[rs(instr)].x) < (u32)imm(instr)) ? 1.f : 0.f; + tempImm.w.h = imm(instr); + ret.y = 0.f; + ret.x = (f16Unsign(CPU_reg[rs(instr)].x) < tempImm.w.h) ? 1.f : 0.f; + ret.flags |= VALID_1; + ret.value = rtVal; - CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)] = ret; } //////////////////////////////////// @@ -197,11 +246,13 @@ void PGXP_CPU_LUI(u32 instr, u32 rtVal) CPU_reg[rt(instr)].y = (float)(s16)imm(instr); CPU_reg[rt(instr)].hFlags = VALID_HALF; CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)].flags = VALID_01; } //////////////////////////////////// // Register Arithmetic //////////////////////////////////// + void PGXP_CPU_ADD(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) { // Rd = Rs + Rt (signed) @@ -218,8 +269,19 @@ void PGXP_CPU_ADD(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) ret = CPU_reg[rs(instr)]; - ret.x += CPU_reg[rt(instr)].x; - ret.y += CPU_reg[rt(instr)].y; + ret.x = f16Unsign(ret.x); + ret.x += f16Unsign(CPU_reg[rt(instr)].x); + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = f16Sign(ret.x); + //ret.x -= of * (USHRT_MAX + 1); + ret.y += CPU_reg[rt(instr)].y + of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + + // TODO: decide which "z/w" component to use ret.halfFlags[0] &= CPU_reg[rt(instr)].halfFlags[0]; ret.gFlags |= CPU_reg[rt(instr)].gFlags; @@ -253,8 +315,17 @@ void PGXP_CPU_SUB(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) ret = CPU_reg[rs(instr)]; - ret.x -= CPU_reg[rt(instr)].x; - ret.y -= CPU_reg[rt(instr)].y; + ret.x = f16Unsign(ret.x); + ret.x -= f16Unsign(CPU_reg[rt(instr)].x); + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = f16Sign(ret.x); + //ret.x -= of * (USHRT_MAX + 1); + ret.y -= CPU_reg[rt(instr)].y - of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; ret.halfFlags[0] &= CPU_reg[rt(instr)].halfFlags[0]; ret.gFlags |= CPU_reg[rt(instr)].gFlags; @@ -348,6 +419,18 @@ void PGXP_CPU_AND(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) // ret.valid = 1; // /iCB Hack + // Get a valid W + if ((CPU_reg[rs(instr)].flags & VALID_2) == VALID_2) + { + ret.z = CPU_reg[rs(instr)].z; + ret.compFlags[2] = CPU_reg[rs(instr)].compFlags[2]; + } + else if((CPU_reg[rt(instr)].flags & VALID_2) == VALID_2) + { + ret.z = CPU_reg[rt(instr)].z; + ret.compFlags[2] = CPU_reg[rt(instr)].compFlags[2]; + } + ret.value = rdVal; CPU_reg[rd(instr)] = ret; } @@ -385,10 +468,10 @@ void PGXP_CPU_SLT(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) } ret = CPU_reg[rs(instr)]; - - // TODO: fix for single or double values? ret.y = 0.f; - ret.x = (CPU_reg[rs(instr)].x < CPU_reg[rt(instr)].x) ? 1.f : 0.f; + ret.compFlags[1] = VALID; + + ret.x = (CPU_reg[rs(instr)].y < CPU_reg[rt(instr)].y) ? 1.f : (f16Unsign(CPU_reg[rs(instr)].x) < f16Unsign(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -409,9 +492,10 @@ void PGXP_CPU_SLTU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) } ret = CPU_reg[rs(instr)]; - ret.y = 0.f; - ret.x = (fabs(CPU_reg[rs(instr)].x) < fabs(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; + ret.compFlags[1] = VALID; + + ret.x = (f16Unsign(CPU_reg[rs(instr)].y) < f16Unsign(CPU_reg[rt(instr)].y)) ? 1.f : (f16Unsign(CPU_reg[rs(instr)].x) < f16Unsign(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -420,6 +504,7 @@ void PGXP_CPU_SLTU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) //////////////////////////////////// // Register mult/div //////////////////////////////////// + void PGXP_CPU_MULT(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) { // Hi/Lo = Rs * Rt (signed) @@ -433,14 +518,36 @@ void PGXP_CPU_MULT(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = CPU_reg[rs(instr)].y + (CPU_reg[rs(instr)].x / (float)(1 << 16)); - float vt = CPU_reg[rt(instr)].y + (CPU_reg[rt(instr)].x / (float)(1 << 16)); - - CPU_Hi.x = vs * vt;// CPU_reg[rs(instr)].y * CPU_reg[rt(instr)].y; - CPU_Lo.y = (CPU_Hi.x - ((s32)CPU_Hi.x)) * (float)(1 << 16);// CPU_reg[rs(instr)].x * CPU_reg[rt(instr)].x; // Get fractional part + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double xx, xy, yx, yy; + double lx = 0, ly = 0, hx = 0, hy = 0; + s64 of = 0; + + // Multiply out components + xx = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].x); + xy = f16Unsign(CPU_reg[rs(instr)].x) * (CPU_reg[rt(instr)].y); + yx = (CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].x); + yy = (CPU_reg[rs(instr)].y) * (CPU_reg[rt(instr)].y); + + // Split values into outputs + lx = xx; + + ly = f16Overflow(xx); + ly += xy + yx; + + hx = f16Overflow(ly); + hx += yy; + + hy = f16Overflow(hx); + + CPU_Lo.x = f16Sign(lx); + CPU_Lo.y = f16Sign(ly); + CPU_Hi.x = f16Sign(hx); + CPU_Hi.y = f16Sign(hy); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } @@ -458,22 +565,44 @@ void PGXP_CPU_MULTU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = fabs(CPU_reg[rs(instr)].y) + (fabs(CPU_reg[rs(instr)].x) / (float)(1 << 16)); - float vt = fabs(CPU_reg[rt(instr)].y) + (fabs(CPU_reg[rt(instr)].x) / (float)(1 << 16)); - - CPU_Hi.x = vs * vt;// fabs(CPU_reg[rs(instr)].y) * fabs(CPU_reg[rt(instr)].y); - CPU_Lo.y = (CPU_Hi.x - ((s32)CPU_Hi.x)) * (float)(1 << 16);// fabs(CPU_reg[rs(instr)].x) * fabs(CPU_reg[rt(instr)].x); // Get fractional part + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double xx, xy, yx, yy; + double lx = 0, ly = 0, hx = 0, hy = 0; + s64 of = 0; + + // Multiply out components + xx = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].x); + xy = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].y); + yx = f16Unsign(CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].x); + yy = f16Unsign(CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].y); + + // Split values into outputs + lx = xx; + + ly = f16Overflow(xx); + ly += xy + yx; + + hx = f16Overflow(ly); + hx += yy; + + hy = f16Overflow(hx); + + CPU_Lo.x = f16Sign(lx); + CPU_Lo.y = f16Sign(ly); + CPU_Hi.x = f16Sign(hx); + CPU_Hi.y = f16Sign(hy); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } void PGXP_CPU_DIV(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) { - // Hi = Rs / Rt (signed) - // Lo = Rs % Rt (signed) + // Lo = Rs / Rt (signed) + // Hi = Rs % Rt (signed) Validate(&CPU_reg[rs(instr)], rsVal); Validate(&CPU_reg[rt(instr)], rtVal); @@ -484,23 +613,29 @@ void PGXP_CPU_DIV(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = CPU_reg[rs(instr)].y + (CPU_reg[rs(instr)].x / (float)(1 << 16)); - float vt = CPU_reg[rt(instr)].y + (CPU_reg[rt(instr)].x / (float)(1 << 16)); - - CPU_Lo.x = vs / vt; - CPU_Hi.x = fmod(vs, vt); - CPU_Lo.x -= CPU_Hi.x; + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double vs = f16Unsign(CPU_reg[rs(instr)].x) + (CPU_reg[rs(instr)].y) * (double)(1 << 16); + double vt = f16Unsign(CPU_reg[rt(instr)].x) + (CPU_reg[rt(instr)].y) * (double)(1 << 16); + + double lo = vs / vt; + CPU_Lo.y = f16Sign(f16Overflow(lo)); + CPU_Lo.x = f16Sign(lo); + + double hi = fmod(vs, vt); + CPU_Hi.y = f16Sign(f16Overflow(hi)); + CPU_Hi.x = f16Sign(hi); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } void PGXP_CPU_DIVU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) { - // Hi = Rs / Rt (unsigned) - // Lo = Rs % Rt (unsigned) + // Lo = Rs / Rt (unsigned) + // Hi = Rs % Rt (unsigned) Validate(&CPU_reg[rs(instr)], rsVal); Validate(&CPU_reg[rt(instr)], rtVal); @@ -511,15 +646,21 @@ void PGXP_CPU_DIVU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) MakeValid(&CPU_reg[rt(instr)], rtVal); } - float vs = CPU_reg[rs(instr)].y + (CPU_reg[rs(instr)].x / (float)(1 << 16)); - float vt = CPU_reg[rt(instr)].y + (CPU_reg[rt(instr)].x / (float)(1 << 16)); - - CPU_Lo.x = fabs(vs) / fabs(vt); - CPU_Hi.x = fmod(fabs(vs), fabs(vt)); - CPU_Lo.x -= CPU_Hi.x; + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + double vs = f16Unsign(CPU_reg[rs(instr)].x) + f16Unsign(CPU_reg[rs(instr)].y) * (double)(1 << 16); + double vt = f16Unsign(CPU_reg[rt(instr)].x) + f16Unsign(CPU_reg[rt(instr)].y) * (double)(1 << 16); + + double lo = vs / vt; + CPU_Lo.y = f16Sign(f16Overflow(lo)); + CPU_Lo.x = f16Sign(lo); + + double hi = fmod(vs, vt); + CPU_Hi.y = f16Sign(f16Overflow(hi)); + CPU_Hi.x = f16Sign(hi); + CPU_Lo.value = loVal; CPU_Hi.value = hiVal; } @@ -533,21 +674,89 @@ void PGXP_CPU_SLL(u32 instr, u32 rdVal, u32 rtVal) PGXP_value ret; u32 sh = sa(instr); Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; - // Shift y into x? - if (sh >= 16) + // TODO: Shift flags +#if 1 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.y = ret.x; - ret.x = 0; - ret.hFlags = ret.lFlags; - ret.lFlags = 0; - sh -= 16; + x = 0.f; + y = 0.f; } - - // assume multiply with no overflow - ret.x *= (float)(1 << sh); - ret.y *= (float)(1 << sh); + else if (sh == 16) + { + y = f16Sign(x); + x = 0.f; + } + else if (sh >= 16) + { + y = x * (1 << (sh - 16)); + y = f16Sign(y); + x = 0.f; + } + else + { + x = x * (1 << sh); + y = y * (1 << sh); + y += f16Overflow(x); + x = f16Sign(x); + y = f16Sign(y); + } +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.w.h = 0; // remove Y + iY.w.l = 0; // remove X + + // Shift test values + psx_value dX; + dX.d = iX.d << sh; + psx_value dY; + dY.d = iY.d << sh; + + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y * (1 << sh); + + if (dX.sw.h != 0.f) + { + if (sh == 16) + { + y = x; + } + else if (sh < 16) + { + y += f16Unsign(x) / (1 << (16 - sh)); + //if (in.x < 0) + // y += 1 << (16 - sh); + } + else + { + y += x * (1 << (sh - 16)); + } + } + + // if there's anything left of X write it in + if (dX.w.l != 0.f) + x = x * (1 << sh); + else + x = 0; + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -559,21 +768,90 @@ void PGXP_CPU_SRL(u32 instr, u32 rdVal, u32 rtVal) PGXP_value ret; u32 sh = sa(instr); Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; - // Shift x into y? - if (sh >= 16) +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.x = ret.y; - ret.y = 0; - ret.lFlags = ret.hFlags; - ret.hFlags = 0; - sh -= 16; + x = y = 0.f; } - - // assume divide with no overflow - ret.x /= (float)(1 << sh); - ret.y /= (float)(1 << sh); + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.d = iY.d >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -582,7 +860,98 @@ void PGXP_CPU_SRL(u32 instr, u32 rdVal, u32 rtVal) void PGXP_CPU_SRA(u32 instr, u32 rdVal, u32 rtVal) { // Rd = Rt >> Sa - PGXP_CPU_SRL(instr, rdVal, rtVal); + PGXP_value ret; + u32 sh = sa(instr); + Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; + +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = (CPU_reg[rt(instr)].y); + if (sh >= 32) + { + // sign extend + x = y = (y < 0) ? -1.f : 0.f; + } + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } + +#else + double x = CPU_reg[rt(instr)].x, y = CPU_reg[rt(instr)].y; + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.sd = iY.sd >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; } //////////////////////////////////// @@ -598,19 +967,84 @@ void PGXP_CPU_SLLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) ret = CPU_reg[rt(instr)]; - // Shift y into x? - if (sh >= 16) +#if 1 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.y = ret.x; - ret.x = 0; - ret.hFlags = ret.lFlags; - ret.lFlags = 0; - sh -= 16; + x = 0.f; + y = 0.f; + } + else if (sh == 16) + { + y = f16Sign(x); + x = 0.f; + } + else if (sh >= 16) + { + y = x * (1 << (sh - 16)); + y = f16Sign(y); + x = 0.f; + } + else + { + x = x * (1 << sh); + y = y * (1 << sh); + y += f16Overflow(x); + x = f16Sign(x); + y = f16Sign(y); + } +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.w.h = 0; // remove Y + iY.w.l = 0; // remove X + + // Shift test values + psx_value dX; + dX.d = iX.d << sh; + psx_value dY; + dY.d = iY.d << sh; + + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y * (1 << sh); + + if (dX.sw.h != 0.f) + { + if (sh == 16) + { + y = x; + } + else if (sh < 16) + { + y += f16Unsign(x) / (1 << (16 - sh)); + //if (in.x < 0) + // y += 1 << (16 - sh); + } + else + { + y += x * (1 << (sh - 16)); + } } - // assume multiply with no overflow - ret.x *= (float)(1 << sh); - ret.y *= (float)(1 << sh); + // if there's anything left of X write it in + if (dX.w.l != 0.f) + x = x * (1 << sh); + else + x = 0; + + x = f16Sign(x); + y = f16Sign(y); + +#endif + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -626,19 +1060,89 @@ void PGXP_CPU_SRLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) ret = CPU_reg[rt(instr)]; - // Shift x into y? - if (sh >= 16) +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) { - ret.x = ret.y; - ret.y = 0; - ret.lFlags = ret.hFlags; - ret.hFlags = 0; - sh -= 16; + x = y = 0.f; + } + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); } - // assume divide with no overflow - ret.x /= (float)(1 << sh); - ret.y /= (float)(1 << sh); +#else + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.d = iY.d >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; ret.value = rdVal; CPU_reg[rd(instr)] = ret; @@ -646,7 +1150,99 @@ void PGXP_CPU_SRLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) void PGXP_CPU_SRAV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) { - PGXP_CPU_SRLV(instr, rdVal, rtVal, rsVal); + // Rd = Rt >> Sa + PGXP_value ret; + u32 sh = rsVal & 0x1F; + Validate(&CPU_reg[rt(instr)], rtVal); + Validate(&CPU_reg[rs(instr)], rsVal); + + ret = CPU_reg[rt(instr)]; +#if 0 + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) + { + x = y = 0.f; + } + else if (sh >= 16) + { + x = y / (1 << (sh - 16)); + x = f16Sign(x); + y = (y < 0) ? -1.f : 0.f; // sign extend + } + else + { + x = x / (1 << sh); + + // check for potential sign extension in overflow + psx_value valt; + valt.d = rtVal; + u16 mask = 0xFFFF >> (16 - sh); + if ((valt.w.h & mask) == mask) + x += mask << (16 - sh); + else if ((valt.w.h & mask) == 0) + x = x; + else + x += y * (1 << (16 - sh));//f16Overflow(y); + + y = y / (1 << sh); + x = f16Sign(x); + y = f16Sign(y); + } + +#else + double x = CPU_reg[rt(instr)].x, y = CPU_reg[rt(instr)].y; + + psx_value iX; iX.d = rtVal; + psx_value iY; iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.sd = iY.sd >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + +#endif + + ret.x = x; + ret.y = y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; } //////////////////////////////////// @@ -699,7 +1295,6 @@ void PGXP_CPU_LW(u32 instr, u32 rtVal, u32 addr) { // Rt = Mem[Rs + Im] ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal); - //CPU_reg[rt(instr)] = PGXP_validateXY(ReadMem(addr), rtVal); } void PGXP_CPU_LWR(u32 instr, u32 rtVal, u32 addr) @@ -714,7 +1309,7 @@ void PGXP_CPU_LH(u32 instr, u16 rtVal, u32 addr) // Rt = Mem[Rs + Im] (sign extended) psx_value val; val.sd = (s32)(s16)rtVal; - ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d); + ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d, 1); } void PGXP_CPU_LHU(u32 instr, u16 rtVal, u32 addr) @@ -723,7 +1318,7 @@ void PGXP_CPU_LHU(u32 instr, u16 rtVal, u32 addr) psx_value val; val.d = rtVal; val.w.h = 0; - ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d); + ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, val.d, 0); } // Load 8-bit @@ -749,7 +1344,6 @@ void PGXP_CPU_SW(u32 instr, u32 rtVal, u32 addr) // Mem[Rs + Im] = Rt Validate(&CPU_reg[rt(instr)], rtVal); WriteMem(&CPU_reg[rt(instr)], addr); - //WriteMemOld(PGXP_validateXY(&CPU_reg[rt(instr)], rtVal), addr); } void PGXP_CPU_SWR(u32 instr, u32 rtVal, u32 addr) diff --git a/libpcsxcore/pgxp_debug.c b/libpcsxcore/pgxp_debug.c index 4bd1567e..beb12526 100644 --- a/libpcsxcore/pgxp_debug.c +++ b/libpcsxcore/pgxp_debug.c @@ -119,14 +119,14 @@ static PGXP_CPU_OpData PGXP_BSC_LUT[64] = { #define PGXP_Data_DIVU { DBG_E_DIVU, fOp_CPU_Hi | fOp_CPU_Lo, fOp_CPU_Rs | fOp_CPU_Rt, 4, 4, "/", "DIVU", (void(*)())PGXP_CPU_DIVU } // Shift operations (sa) -#define PGXP_Data_SLL { DBG_E_SLL, fOp_CPU_Rd, fOp_CPU_Rt | fOp_Sa, 2, 2, ">>", "SLL", (void(*)())PGXP_CPU_SLL } -#define PGXP_Data_SRL { DBG_E_SRL, fOp_CPU_Rd, fOp_CPU_Rt | fOp_Sa, 2, 2, "<<", "SRL", (void(*)())PGXP_CPU_SRL } -#define PGXP_Data_SRA { DBG_E_SRA, fOp_CPU_Rd, fOp_CPU_Rt | fOp_Sa, 2, 2, "<<", "SRA", (void(*)())PGXP_CPU_SRA } +#define PGXP_Data_SLL { DBG_E_SLL, fOp_CPU_Rd, fOp_CPU_Rt | fOp_Sa, 2, 2, "<<", "SLL", (void(*)())PGXP_CPU_SLL } +#define PGXP_Data_SRL { DBG_E_SRL, fOp_CPU_Rd, fOp_CPU_Rt | fOp_Sa, 2, 2, ">>", "SRL", (void(*)())PGXP_CPU_SRL } +#define PGXP_Data_SRA { DBG_E_SRA, fOp_CPU_Rd, fOp_CPU_Rt | fOp_Sa, 2, 2, ">>", "SRA", (void(*)())PGXP_CPU_SRA } // Shift operations variable -#define PGXP_Data_SLLV { DBG_E_SLLV, fOp_CPU_Rd, fOp_CPU_Rt | fOp_CPU_Rs, 3, 3, ">>", "SLLV", (void(*)())PGXP_CPU_SLLV } -#define PGXP_Data_SRLV { DBG_E_SRLV, fOp_CPU_Rd, fOp_CPU_Rt | fOp_CPU_Rs, 3, 3, "<<", "SRLV", (void(*)())PGXP_CPU_SRLV } -#define PGXP_Data_SRAV { DBG_E_SRAV, fOp_CPU_Rd, fOp_CPU_Rt | fOp_CPU_Rs, 3, 3, "<<", "SRAV", (void(*)())PGXP_CPU_SRAV } +#define PGXP_Data_SLLV { DBG_E_SLLV, fOp_CPU_Rd, fOp_CPU_Rt | fOp_CPU_Rs, 3, 3, "<<", "SLLV", (void(*)())PGXP_CPU_SLLV } +#define PGXP_Data_SRLV { DBG_E_SRLV, fOp_CPU_Rd, fOp_CPU_Rt | fOp_CPU_Rs, 3, 3, ">>", "SRLV", (void(*)())PGXP_CPU_SRLV } +#define PGXP_Data_SRAV { DBG_E_SRAV, fOp_CPU_Rd, fOp_CPU_Rt | fOp_CPU_Rs, 3, 3, ">>", "SRAV", (void(*)())PGXP_CPU_SRAV } // Move registers #define PGXP_Data_MFHI { DBG_E_MFHI, fOp_CPU_Rd, fOp_CPU_Hi, 2, 2, "<-", "MFHI", (void(*)())PGXP_CPU_MFHI } @@ -203,11 +203,96 @@ PGXP_CPU_OpData GetOpData(u32 instr) return pOpData; } -void PrintOperands(char* szBuffer, u32 instr, u32 flags, const char* szDelim, psx_value* psx_regs, u32* regIdx) +PGXP_value* GetReg(u32 instr, u32 flag, u32 psxValue) +{ + // iCB Hack: reorder Rs and Rt for SLLV SRLV and SRAV + if ((op(instr) == 0) && (func(instr) > 3) && (func(instr) < 8)) + flag = (flag == fOp_CPU_Rs) ? fOp_CPU_Rt : ((flag == fOp_CPU_Rt) ? fOp_CPU_Rs : flag); + // /iCB Hack + + switch (flag) + { + case fOp_CPU_Hi: + return &CPU_Hi; + case fOp_CPU_Lo: + return &CPU_Lo; + case fOp_CPU_Rd: + return &CPU_reg[rd(instr)]; + case fOp_CPU_Rs: + return &CPU_reg[rs(instr)]; + case fOp_CPU_Rt: + return &CPU_reg[rt(instr)]; + case fOp_GTE_Dd: + return >E_data_reg[rd(instr)]; + case fOp_GTE_Dt: + return >E_data_reg[rt(instr)]; + case fOp_GTE_Cd: + return >E_ctrl_reg[rd(instr)]; + case fOp_GTE_Ct: + return >E_ctrl_reg[rt(instr)]; + case fOp_CP0_Dd: + return &CP0_reg[rd(instr)]; + case fOp_CP0_Cd: + return &CP0_reg[rd(instr)]; + case fOp_Ad: + return GetPtr(psxValue); + default: + return NULL; + } +} + +void ForceValues(u32 instr, u32 flags, psx_value* psx_regs, u32 startIdx) +{ + PGXP_value* pReg = NULL; + u32 regIdx = startIdx; + + for (u32 opdIdx = 0; opdIdx < 14; opdIdx++) + { + u32 flag = 1 << opdIdx; + + // iCB: Skip Load operations as data at address is unknown + if ((flags & flag) && (flag != fOp_Ad)) + { + pReg = GetReg(instr, flag, psx_regs[regIdx].d); + + if (pReg) + { + SetValue(pReg, psx_regs[regIdx].d); + regIdx++; + } + } + } +} + +void TestValues(u32 instr, u32 flags, psx_value* psx_regs, u32 *test_flags, u32 startIdx) +{ + PGXP_value* pReg = NULL; + u32 regIdx = startIdx; + + for (u32 opdIdx = 0; opdIdx < 14; opdIdx++) + { + u32 flag = 1 << opdIdx; + + // iCB: Skip Store operations as data at address is unknown + if ((flags & flag) && (flag != fOp_Ad)) + { + pReg = GetReg(instr, flag, psx_regs[regIdx].d); + + if (pReg) + { + test_flags[regIdx] = ValueToTolerance(pReg, psx_regs[regIdx].d, PGXP_DEBUG_TOLERANCE); + regIdx++; + } + } + } +} + +void PrintOperands(char* szBuffer, u32 instr, u32 flags, const char* szDelim, psx_value* psx_regs, u32 startIdx) { char szTempBuffer[256]; PGXP_value* pReg = NULL; psx_value psx_reg; + u32 regIdx = startIdx; char szOpdName[16]; const char* szPre = ""; @@ -217,7 +302,7 @@ void PrintOperands(char* szBuffer, u32 instr, u32 flags, const char* szDelim, ps u32 flag = 1 << opdIdx; // iCB Hack: reorder Rs and Rt for SLLV SRLV and SRAV - if ((op(instr) < 8) && (op(instr) > 3)) + if ((op(instr) == 0) && (func(instr) > 3) && (func(instr) < 8)) flag = (flag == fOp_CPU_Rs) ? fOp_CPU_Rt : ((flag == fOp_CPU_Rt) ? fOp_CPU_Rs : flag); // /iCB Hack @@ -228,62 +313,62 @@ void PrintOperands(char* szBuffer, u32 instr, u32 flags, const char* szDelim, ps case fOp_CPU_Hi: pReg = &CPU_Hi; sprintf(szOpdName, "Hi"); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_CPU_Lo: pReg = &CPU_Lo; sprintf(szOpdName, "Lo"); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_CPU_Rd: pReg = &CPU_reg[rd(instr)]; sprintf(szOpdName, "Rd[%d]", rd(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_CPU_Rs: pReg = &CPU_reg[rs(instr)]; sprintf(szOpdName, "Rs[%d]", rs(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_CPU_Rt: pReg = &CPU_reg[rt(instr)]; sprintf(szOpdName, "Rt[%d]", rt(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_GTE_Dd: pReg = >E_data_reg[rd(instr)]; sprintf(szOpdName, "GTE_Dd[%d]", rd(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_GTE_Dt: pReg = >E_data_reg[rt(instr)]; sprintf(szOpdName, "GTE_Dt[%d]", rt(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_GTE_Cd: pReg = >E_ctrl_reg[rd(instr)]; sprintf(szOpdName, "GTE_Cd[%d]", rd(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_GTE_Ct: pReg = >E_ctrl_reg[rt(instr)]; sprintf(szOpdName, "GTE_Ct[%d]", rt(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_CP0_Dd: pReg = &CP0_reg[rd(instr)]; sprintf(szOpdName, "CP0_Dd[%d]", rd(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_CP0_Cd: pReg = &CP0_reg[rd(instr)]; sprintf(szOpdName, "CP0_Cd[%d]", rd(instr)); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_Ad: pReg = NULL; sprintf(szOpdName, "Addr"); - psx_reg = psx_regs[(*regIdx)++]; + psx_reg = psx_regs[regIdx++]; break; case fOp_Sa: pReg = NULL; @@ -333,8 +418,9 @@ void PGXP_CPU_DebugOutput(u32 eOp, u32 instr, u32 numOps, u32 op1, u32 op2, u32 char szOutputBuffer[256]; char szInputBuffer[512]; PGXP_CPU_OpData opData = GetOpData(instr); - psx_value psx_regs[4]; - u32 regIdx = 0; + u32 test_flags[4] = { VALID_ALL, VALID_ALL, VALID_ALL, VALID_ALL }; + psx_value psx_regs[4]; + u32 inIdx = 0; psx_regs[0].d = op1; psx_regs[1].d = op2; psx_regs[2].d = op3; @@ -353,22 +439,29 @@ void PGXP_CPU_DebugOutput(u32 eOp, u32 instr, u32 numOps, u32 op1, u32 op2, u32 // /iCB Hack + // skip output arguments to find first input + for (u32 opdIdx = 0; opdIdx < 12; opdIdx++) + { + if (opData.OutputFlags & (1 << opdIdx)) + inIdx++; + } + +#ifdef PGXP_FORCE_INPUT_VALUES + ForceValues(instr, opData.InputFlags, psx_regs, inIdx); +#endif + + +#ifdef PGXP_OUTPUT_ALL // reset buffers if (pgxp_debug) { memset(szInputBuffer, 0, sizeof(szInputBuffer)); memset(szOutputBuffer, 0, sizeof(szOutputBuffer)); - // skip output arguments - for (u32 opdIdx = 0; opdIdx < 12; opdIdx++) - { - if (opData.OutputFlags & (1 << opdIdx)) - regIdx++; - } - // Print inputs - PrintOperands(szInputBuffer, instr, opData.InputFlags, opData.szOpString, psx_regs, ®Idx); + PrintOperands(szInputBuffer, instr, opData.InputFlags, opData.szOpString, psx_regs, inIdx); } +#endif // Call function if (numOps != opData.numArgs) @@ -396,19 +489,28 @@ void PGXP_CPU_DebugOutput(u32 eOp, u32 instr, u32 numOps, u32 op1, u32 op2, u32 break; } +#ifdef PGXP_TEST_OUTPUT_VALUES + TestValues(instr, opData.OutputFlags, psx_regs, test_flags, 0); +#endif//PGXP_TEST_OUTPUT_VALUES + +#ifdef PGXP_OUTPUT_ALL // Print operation details if (pgxp_debug) { sprintf(szOutputBuffer, "%s %x %x: ", opData.szOpName, op(instr), func(instr)); // Print outputs - regIdx = 0; - PrintOperands(szOutputBuffer, instr, opData.OutputFlags, "/", psx_regs, ®Idx); + PrintOperands(szOutputBuffer, instr, opData.OutputFlags, "/", psx_regs, 0); strcat(szOutputBuffer, "="); #ifdef GTE_LOG - GTE_LOG("PGXP_Trace: %s %s|", szOutputBuffer, szInputBuffer); -#endif +#ifdef PGXP_TEST_OUTPUT_VALUES + if((test_flags[0] & test_flags[1] & VALID_01) != VALID_01) +#endif//PGXP_TEST_OUTPUT_VALUES + GTE_LOG("PGXP_Trace: %s %s|", szOutputBuffer, szInputBuffer); +#endif//GTE_LOG + } +#endif//PGXP_OUTPUT_ALL } void PGXP_psxTraceOp(u32 eOp, u32 instr) diff --git a/libpcsxcore/pgxp_debug.h b/libpcsxcore/pgxp_debug.h index b19b3560..aaac6333 100644 --- a/libpcsxcore/pgxp_debug.h +++ b/libpcsxcore/pgxp_debug.h @@ -30,7 +30,13 @@ #include "psxcommon.h" -#define PGXP_CPU_DEBUG +//#define PGXP_CPU_DEBUG +//#define PGXP_OUTPUT_ALL +//#define PGXP_FORCE_INPUT_VALUES +//#define PGXP_TEST_OUTPUT_VALUES + +#define PGXP_DEBUG_TOLERANCE 2.f + // Debug wrappers void PGXP_psxTraceOp(u32 eOp, u32 code); diff --git a/libpcsxcore/pgxp_mem.c b/libpcsxcore/pgxp_mem.c index 1613ebed..6e632310 100644 --- a/libpcsxcore/pgxp_mem.c +++ b/libpcsxcore/pgxp_mem.c @@ -139,7 +139,7 @@ void ValidateAndCopyMem(PGXP_value* dest, u32 addr, u32 value) *dest = PGXP_value_invalid_address; } -void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value) +void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value, int sign) { u32 validMask = 0; psx_value val, mask; @@ -174,7 +174,7 @@ void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value) } // truncate value - dest->y = 0.f; + dest->y = (dest->x < 0) ? -1.f * sign : 0.f;// 0.f; dest->hFlags = 0; dest->value = value; dest->compFlags[1] = VALID; // iCB: High word is valid, just 0 @@ -216,6 +216,14 @@ void WriteMem16(PGXP_value* src, u32 addr) pVal->w.l = (u16)src->value; } + // overwrite z/w if valid + if (src->compFlags[2] == VALID) + { + dest->z = src->z; + dest->compFlags[2] = src->compFlags[2]; + } + + //dest->valid = dest->valid && src->valid; dest->gFlags |= src->gFlags; // inherit flags from both values (?) } diff --git a/libpcsxcore/pgxp_mem.h b/libpcsxcore/pgxp_mem.h index 5f79a166..18e14ff7 100644 --- a/libpcsxcore/pgxp_mem.h +++ b/libpcsxcore/pgxp_mem.h @@ -41,7 +41,7 @@ PGXP_value* GetPtr(u32 addr); PGXP_value* ReadMem(u32 addr); void ValidateAndCopyMem(PGXP_value* dest, u32 addr, u32 value); -void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value); +void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value, int sign); void WriteMem(PGXP_value* value, u32 addr); void WriteMem16(PGXP_value* src, u32 addr); diff --git a/libpcsxcore/pgxp_value.c b/libpcsxcore/pgxp_value.c index 5c46a7d4..c52a8999 100644 --- a/libpcsxcore/pgxp_value.c +++ b/libpcsxcore/pgxp_value.c @@ -1,5 +1,17 @@ #include "pgxp_value.h" +#include "limits.h" +void SetValue(PGXP_value *pV, u32 psxV) +{ + psx_value psx; + psx.d = psxV; + + pV->x = psx.sw.l; + pV->y = psx.sw.h; + pV->z = 0.f; + pV->flags = VALID_01; + pV->value = psx.d; +} void MakeValid(PGXP_value *pV, u32 psxV) { @@ -9,8 +21,8 @@ void MakeValid(PGXP_value *pV, u32 psxV) { pV->x = psx.sw.l; pV->y = psx.sw.h; - pV->z = 1.f; - pV->flags |= VALID_ALL; + pV->z = 0.f; + pV->flags |= VALID_01; pV->value = psx.d; } } @@ -18,11 +30,39 @@ void MakeValid(PGXP_value *pV, u32 psxV) void Validate(PGXP_value *pV, u32 psxV) { // assume pV is not NULL - pV->flags &= pV->value == psxV ? ALL : INV_VALID_ALL; + pV->flags &= (pV->value == psxV) ? ALL : INV_VALID_ALL; } void MaskValidate(PGXP_value *pV, u32 psxV, u32 mask, u32 validMask) { // assume pV is not NULL pV->flags &= ((pV->value & mask) == (psxV & mask)) ? ALL : (ALL ^ (validMask)); +} + +u32 ValueToTolerance(PGXP_value *pV, u32 psxV, float tolerance) +{ + psx_value psx; + psx.d = psxV; + u32 retFlags = VALID_ALL; + + if (fabs(pV->x - psx.sw.l) >= tolerance) + retFlags = retFlags & (VALID_1 | VALID_2 | VALID_3); + + if (fabs(pV->y - psx.sw.h) >= tolerance) + retFlags = retFlags & (VALID_0 | VALID_2 | VALID_3); + + return retFlags; +} + +/// float logical arithmetic /// + +double f16Sign(double in) { u32 s = in * (double)((u32)1 << 16); return ((double)*((s32*)&s)) / (double)((s32)1 << 16); } +double f16Unsign(double in) { return (in >= 0) ? in : ((double)in + (double)USHRT_MAX + 1); } +double fu16Trunc(double in) { u32 u = in * (double)((u32)1 << 16); return (double)u / (double)((u32)1 << 16); } +double f16Overflow(double in) +{ + double out = 0; + s64 v = ((s64)in) >> 16; + out = v; + return out; } \ No newline at end of file diff --git a/libpcsxcore/pgxp_value.h b/libpcsxcore/pgxp_value.h index e92f5f4a..68809fa2 100644 --- a/libpcsxcore/pgxp_value.h +++ b/libpcsxcore/pgxp_value.h @@ -98,10 +98,16 @@ typedef enum static const PGXP_value PGXP_value_invalid_address = { 0.f, 0.f, 0.f, 0, 0, 0, INVALID_ADDRESS, 0, 0 }; static const PGXP_value PGXP_value_zero = { 0.f, 0.f, 0.f, 0, 0, VALID_ALL, 0, 0, 0 }; -void MakeValid(PGXP_value *pV, u32 psxV); -void Validate(PGXP_value *pV, u32 psxV); -void MaskValidate(PGXP_value *pV, u32 psxV, u32 mask, u32 validMask); +void SetValue(PGXP_value *pV, u32 psxV); +void MakeValid(PGXP_value *pV, u32 psxV); +void Validate(PGXP_value *pV, u32 psxV); +void MaskValidate(PGXP_value *pV, u32 psxV, u32 mask, u32 validMask); +u32 ValueToTolerance(PGXP_value *pV, u32 psxV, float tolerance); +double f16Sign(double in); +double f16Unsign(double in); +double fu16Trunc(double in); +double f16Overflow(double in); typedef union { diff --git a/plugins/peopsxgl/pgxp_gpu.c b/plugins/peopsxgl/pgxp_gpu.c index 279f2dc9..8db849ba 100644 --- a/plugins/peopsxgl/pgxp_gpu.c +++ b/plugins/peopsxgl/pgxp_gpu.c @@ -305,20 +305,23 @@ int PGXP_GetVertices(unsigned int* addr, void* pOutput, int xOffs, int yOffs) pVertex[i].PGXP_flag = 1; if ((primStart[stride * i].flags & VALID_2) != VALID_2) + { pVertex[i].PGXP_flag = 6; + // __Log("GPPV No W: v:%x (%d, %d) pgxp(%f, %f)|\n", (currentAddr + 1 + (i * stride)) * 4, pPrimData[stride * i * 2], pPrimData[(stride * i * 2) + 1], primStart[stride * i].x, primStart[stride * i].y); + } // Log incorrect vertices //if (PGXP_tDebug && - // (fabs((float)pPrimData[stride * i * 2] - pVertex[i].x) > debug_tolerance) || - // (fabs((float)pPrimData[(stride * i * 2) + 1] - pVertex[i].y) > debug_tolerance)) - // __Log("GPPV: v:%x (%d, %d) pgxp(%f, %f)|\n", (currentAddr + 1 + (i * stride)) * 4, pPrimData[stride * i * 2], pPrimData[(stride * i * 2) + 1], pVertex[i].x, pVertex[i].y); + // (fabs((float)pPrimData[stride * i * 2] - primStart[stride * i].x) > debug_tolerance) || + // (fabs((float)pPrimData[(stride * i * 2) + 1] - primStart[stride * i].y) > debug_tolerance)) + // __Log("GPPV: v:%x (%d, %d) pgxp(%f, %f)|\n", (currentAddr + 1 + (i * stride)) * 4, pPrimData[stride * i * 2], pPrimData[(stride * i * 2) + 1], primStart[stride * i].x, primStart[stride * i].y); } else { // Default to low precision vertex data - if (primStart && ((primStart[stride * i].flags & VALID_01) == VALID_01) && primStart[stride * i].value != *(unsigned int*)(&pPrimData[stride * i * 2])) - pVertex[i].PGXP_flag = 6; - else + //if (primStart && ((primStart[stride * i].flags & VALID_01) == VALID_01) && primStart[stride * i].value != *(unsigned int*)(&pPrimData[stride * i * 2])) + // pVertex[i].PGXP_flag = 6; + //else pVertex[i].PGXP_flag = 2; // Look in cache for valid vertex @@ -354,6 +357,10 @@ int PGXP_GetVertices(unsigned int* addr, void* pOutput, int xOffs, int yOffs) for (unsigned i = 0; i < count; ++i) pVertex[i].w = 1; + if(PGXP_vDebug == 3) + for (unsigned i = 0; i < count; ++i) + pVertex[i].PGXP_flag = primIdx; + return 1; } @@ -361,7 +368,7 @@ int PGXP_GetVertices(unsigned int* addr, void* pOutput, int xOffs, int yOffs) //// Visual Debugging Functions ///////////////////////////////// unsigned int PGXP_vDebug = 0; -const unsigned int PGXP_maxDebug = 3; +const unsigned int PGXP_maxDebug = 4; const char red[4] = { 255, 0, 0, 255 }; const char blue[4] = { 0, 0, 255, 255 }; @@ -370,6 +377,9 @@ const char green[4] = { 0, 255, 0, 255 }; const char yellow[4] = { 255, 255, 0, 255 }; const char magenta[4] = { 255, 0, 255, 255 }; const char cyan[4] = { 0, 255, 255, 255 }; + +const char orange[4] = { 255, 128 ,0 ,255 }; + const char black[4] = { 0, 0, 0, 255 }; @@ -426,21 +436,57 @@ void PGXP_colour(OGLVertex* vertex) fDepth = vertex->w / (float)(0xFFFF); glColor4f(fDepth, fDepth, fDepth, 1.f); break; + case 3: + // Primitive type + switch (vertex->PGXP_flag) + { + case 0: + pColour = yellow; + break; + case 1: + pColour = blue; + break; + case 2: + pColour = red; + break; + case 3: + pColour = green; + break; + case 4: + pColour = magenta; + break; + case 6: + pColour = cyan; + break; + case 7: + pColour = orange; + default: + pColour = black; + break; + } + glColor4ubv(pColour); + break; } } -void PGXP_DrawDebugTriQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4) +int PGXP_DrawDebugTriQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4) { GLboolean bTexture = glIsEnabled(GL_TEXTURE_2D); GLfloat fColour[4]; GLint iShadeModel; + //if ((vertex1->PGXP_flag == 0) || + // (vertex2->PGXP_flag == 0) || + // (vertex3->PGXP_flag == 0) || + // (vertex4->PGXP_flag == 0)) + // return 0; + // Quit if PGXP_flag == ignore if ((vertex1->PGXP_flag == 5) || (vertex2->PGXP_flag == 5) || (vertex3->PGXP_flag == 5) || (vertex4->PGXP_flag == 5)) - return; + return 1; glGetIntegerv(GL_SHADE_MODEL, &iShadeModel); glGetFloatv(GL_CURRENT_COLOR, fColour); @@ -486,19 +532,28 @@ void PGXP_DrawDebugTriQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* ve glEnable(GL_TEXTURE_2D); glShadeModel(iShadeModel); + + return 1; } -void PGXP_DrawDebugTri(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3) +int PGXP_DrawDebugTri(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3) { GLboolean bTexture = glIsEnabled(GL_TEXTURE_2D); GLfloat fColour[4]; GLint iShadeModel; + + //if ((vertex1->PGXP_flag == 0) || + // (vertex2->PGXP_flag == 0) || + // (vertex3->PGXP_flag == 0)) + // return 0; + + // Quit if PGXP_flag == ignore if ((vertex1->PGXP_flag == 5) || (vertex2->PGXP_flag == 5) || (vertex3->PGXP_flag == 5)) - return; + return 1; glGetIntegerv(GL_SHADE_MODEL, &iShadeModel); glGetFloatv(GL_CURRENT_COLOR, fColour); @@ -540,20 +595,30 @@ void PGXP_DrawDebugTri(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex glEnable(GL_TEXTURE_2D); glShadeModel(iShadeModel); + + return 1; } -void PGXP_DrawDebugQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4) +int PGXP_DrawDebugQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4) { GLboolean bTexture = glIsEnabled(GL_TEXTURE_2D); GLfloat fColour[4]; GLint iShadeModel; + + //if ((vertex1->PGXP_flag == 0) || + // (vertex2->PGXP_flag == 0) || + // (vertex3->PGXP_flag == 0) || + // (vertex4->PGXP_flag == 0)) + // return 0; + + // Quit if PGXP_flag == ignore if ((vertex1->PGXP_flag == 5) || (vertex2->PGXP_flag == 5) || (vertex3->PGXP_flag == 5) || (vertex4->PGXP_flag == 5)) - return; + return 1; glGetIntegerv(GL_SHADE_MODEL, &iShadeModel); glGetFloatv(GL_CURRENT_COLOR, fColour); @@ -599,4 +664,6 @@ void PGXP_DrawDebugQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* verte glEnable(GL_TEXTURE_2D); glShadeModel(iShadeModel); + + return 1; } \ No newline at end of file diff --git a/plugins/peopsxgl/pgxp_gpu.h b/plugins/peopsxgl/pgxp_gpu.h index df07ff46..aacc6912 100644 --- a/plugins/peopsxgl/pgxp_gpu.h +++ b/plugins/peopsxgl/pgxp_gpu.h @@ -42,8 +42,8 @@ void PGXP_glVertexfv(GLfloat* pVertex); extern unsigned int PGXP_vDebug; extern unsigned int PGXP_debugFlags[4]; -void PGXP_DrawDebugTriQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4); -void PGXP_DrawDebugTri(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3); -void PGXP_DrawDebugQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4); +int PGXP_DrawDebugTriQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4); +int PGXP_DrawDebugTri(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3); +int PGXP_DrawDebugQuad(OGLVertex* vertex1, OGLVertex* vertex2, OGLVertex* vertex3, OGLVertex* vertex4); #endif // _PGXP_GPU_H_ diff --git a/plugins/peopsxgl/prim.c b/plugins/peopsxgl/prim.c index 8ea7d71b..7dd017e0 100755 --- a/plugins/peopsxgl/prim.c +++ b/plugins/peopsxgl/prim.c @@ -152,7 +152,7 @@ static __inline void PRIMdrawTexturedQuad(OGLVertex* vertex1, OGLVertex* vertex2 { if (PGXP_vDebug) { - PGXP_DrawDebugTriQuad(vertex1, vertex2, vertex4, vertex3); + if(PGXP_DrawDebugTriQuad(vertex1, vertex2, vertex4, vertex3)) return; } @@ -179,7 +179,7 @@ static __inline void PRIMdrawTexturedTri(OGLVertex* vertex1, OGLVertex* vertex2, { if (PGXP_vDebug) { - PGXP_DrawDebugTri(vertex1, vertex2, vertex3); + if(PGXP_DrawDebugTri(vertex1, vertex2, vertex3)) return; } @@ -202,7 +202,7 @@ static __inline void PRIMdrawTexGouraudTriColor(OGLVertex* vertex1, OGLVertex* v { if (PGXP_vDebug) { - PGXP_DrawDebugTri(vertex1, vertex2, vertex3); + if(PGXP_DrawDebugTri(vertex1, vertex2, vertex3)) return; } @@ -229,7 +229,7 @@ static __inline void PRIMdrawTexGouraudTriColorQuad(OGLVertex* vertex1, OGLVerte { if (PGXP_vDebug) { - PGXP_DrawDebugTriQuad(vertex1, vertex2, vertex4, vertex3); + if(PGXP_DrawDebugTriQuad(vertex1, vertex2, vertex4, vertex3)) return; } @@ -258,7 +258,7 @@ static __inline void PRIMdrawTri(OGLVertex* vertex1, OGLVertex* vertex2, OGLVert { if (PGXP_vDebug) { - PGXP_DrawDebugTri(vertex1, vertex2, vertex3); + if(PGXP_DrawDebugTri(vertex1, vertex2, vertex3)) return; } @@ -276,7 +276,7 @@ static __inline void PRIMdrawTri2(OGLVertex* vertex1, OGLVertex* vertex2, { if (PGXP_vDebug) { - PGXP_DrawDebugTriQuad(vertex1, vertex3, vertex2, vertex4); + if(PGXP_DrawDebugTriQuad(vertex1, vertex3, vertex2, vertex4)) return; } @@ -295,7 +295,7 @@ static __inline void PRIMdrawGouraudTriColor(OGLVertex* vertex1, OGLVertex* vert { if (PGXP_vDebug) { - PGXP_DrawDebugTri(vertex1, vertex2, vertex3); + if(PGXP_DrawDebugTri(vertex1, vertex2, vertex3)) return; } @@ -318,7 +318,7 @@ static __inline void PRIMdrawGouraudTri2Color(OGLVertex* vertex1, OGLVertex* ver { if (PGXP_vDebug) { - PGXP_DrawDebugTriQuad(vertex1, vertex3, vertex2, vertex4); + if(PGXP_DrawDebugTriQuad(vertex1, vertex3, vertex2, vertex4)) return; } @@ -343,7 +343,7 @@ static __inline void PRIMdrawFlatLine(OGLVertex* vertex1, OGLVertex* vertex2,OGL { if (PGXP_vDebug) { - PGXP_DrawDebugQuad(vertex1, vertex2, vertex3, vertex4); + if(PGXP_DrawDebugQuad(vertex1, vertex2, vertex3, vertex4)) return; } @@ -364,7 +364,7 @@ static __inline void PRIMdrawGouraudLine(OGLVertex* vertex1, OGLVertex* vertex2, { if (PGXP_vDebug) { - PGXP_DrawDebugQuad(vertex1, vertex2, vertex3, vertex4); + if(PGXP_DrawDebugQuad(vertex1, vertex2, vertex3, vertex4)) return; } @@ -391,7 +391,7 @@ static __inline void PRIMdrawQuad(OGLVertex* vertex1, OGLVertex* vertex2, { if (PGXP_vDebug) { - PGXP_DrawDebugQuad(vertex1, vertex2, vertex3, vertex4); + if(PGXP_DrawDebugQuad(vertex1, vertex2, vertex3, vertex4)) return; }