/*************************************************************************** * Copyright (C) 2016 by iCatButler * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * ***************************************************************************/ /************************************************************************** * pgxp_gte.c * PGXP - Parallel/Precision Geometry Xform Pipeline * * Created on: 12 Mar 2016 * Author: iCatButler ***************************************************************************/ #include "pgxp_gte.h" #include "pgxp_value.h" #include "pgxp_mem.h" #include "pgxp_debug.h" #include "pgxp_cpu.h" #include "psxcommon.h" #include "psxmem.h" #include "r3000a.h" // GTE registers PGXP_value GTE_data_reg_mem[32]; PGXP_value GTE_ctrl_reg_mem[32]; PGXP_value* GTE_data_reg = GTE_data_reg_mem; PGXP_value* GTE_ctrl_reg = GTE_ctrl_reg_mem; void PGXP_InitGTE() { memset(GTE_data_reg_mem, 0, sizeof(GTE_data_reg_mem)); memset(GTE_ctrl_reg_mem, 0, sizeof(GTE_ctrl_reg_mem)); } // Instruction register decoding #define op(_instr) (_instr >> 26) // The op part of the instruction register #define func(_instr) ((_instr) & 0x3F) // The funct part of the instruction register #define sa(_instr) ((_instr >> 6) & 0x1F) // The sa part of the instruction register #define rd(_instr) ((_instr >> 11) & 0x1F) // The rd part of the instruction register #define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register #define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register #define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register #define SX0 (GTE_data_reg[ 12 ].x) #define SY0 (GTE_data_reg[ 12 ].y) #define SX1 (GTE_data_reg[ 13 ].x) #define SY1 (GTE_data_reg[ 13 ].y) #define SX2 (GTE_data_reg[ 14 ].x) #define SY2 (GTE_data_reg[ 14 ].y) #define SXY0 (GTE_data_reg[ 12 ]) #define SXY1 (GTE_data_reg[ 13 ]) #define SXY2 (GTE_data_reg[ 14 ]) #define SXYP (GTE_data_reg[ 15 ]) void PGXP_pushSXYZ2f(float _x, float _y, float _z, unsigned int _v) { static unsigned int uCount = 0; low_value temp; // push values down FIFO SXY0 = SXY1; SXY1 = SXY2; SXY2.x = _x; SXY2.y = _y; SXY2.z = Config.PGXP_Texture ? _z : 1.f; SXY2.value = _v; SXY2.flags = VALID_ALL; SXY2.count = uCount++; // cache value in GPU plugin temp.word = _v; if(Config.PGXP_Cache) GPU_pgxpCacheVertex(temp.x, temp.y, &SXY2); else GPU_pgxpCacheVertex(0, 0, NULL); #ifdef GTE_LOG GTE_LOG("PGXP_PUSH (%f, %f) %u %u|", SXY2.x, SXY2.y, SXY2.flags, SXY2.count); #endif } void PGXP_pushSXYZ2s(s64 _x, s64 _y, s64 _z, u32 v) { float fx = (float)(_x) / (float)(1 << 16); float fy = (float)(_y) / (float)(1 << 16); float fz = (float)(_z); if(Config.PGXP_GTE) PGXP_pushSXYZ2f(fx, fy, fz, v); } #define VX(n) (psxRegs.CP2D.p[ n << 1 ].sw.l) #define VY(n) (psxRegs.CP2D.p[ n << 1 ].sw.h) #define VZ(n) (psxRegs.CP2D.p[ (n << 1) + 1 ].sw.l) void PGXP_RTPS(u32 _n, u32 _v) { // Transform float TRX = (s64)psxRegs.CP2C.p[5].sd; float TRY = (s64)psxRegs.CP2C.p[6].sd; float TRZ = (s64)psxRegs.CP2C.p[7].sd; // Rotation with 12-bit shift float R11 = (float)psxRegs.CP2C.p[ 0 ].sw.l / (float)(1 << 12); float R12 = (float)psxRegs.CP2C.p[ 0 ].sw.h / (float)(1 << 12); float R13 = (float)psxRegs.CP2C.p[ 1 ].sw.l / (float)(1 << 12); float R21 = (float)psxRegs.CP2C.p[ 1 ].sw.h / (float)(1 << 12); float R22 = (float)psxRegs.CP2C.p[ 2 ].sw.l / (float)(1 << 12); float R23 = (float)psxRegs.CP2C.p[ 2 ].sw.h / (float)(1 << 12); float R31 = (float)psxRegs.CP2C.p[ 3 ].sw.l / (float)(1 << 12); float R32 = (float)psxRegs.CP2C.p[ 3 ].sw.h / (float)(1 << 12); float R33 = (float)psxRegs.CP2C.p[ 4 ].sw.l / (float)(1 << 12); // Bring vertex into view space float MAC1 = TRX + (R11 * VX(_n)) + (R12 * VY(_n)) + (R13 * VZ(_n)); float MAC2 = TRY + (R21 * VX(_n)) + (R22 * VY(_n)) + (R23 * VZ(_n)); float MAC3 = TRZ + (R31 * VX(_n)) + (R32 * VY(_n)) + (R33 * VZ(_n)); float IR1 = max(min(MAC1, 0x7fff), -0x8000); float IR2 = max(min(MAC2, 0x7fff), -0x8000); float IR3 = max(min(MAC3, 0x7fff), -0x8000); float H = psxRegs.CP2C.p[26].sw.l; // Near plane float F = 0xFFFF; // Far plane? float SZ3 = max(min(MAC3, 0xffff), 0x0000); // Clamp SZ3 to near plane because we have no clipping (no proper Z) // float h_over_sz3 = H / SZ3; // Offsets with 16-bit shift float OFX = (float)psxRegs.CP2C.p[24].sd / (float)(1 << 16); float OFY = (float)psxRegs.CP2C.p[25].sd / (float)(1 << 16); float h_over_w = min(H / SZ3, (float)0x1ffff / (float)0xffff); h_over_w = (SZ3 == 0) ? ((float)0x1ffff / (float)0xffff) : h_over_w; // PSX Screen space X,Y,W components float sx = OFX + (IR1 * h_over_w) * (Config.Widescreen ? 0.75 : 1); float sy = OFY + (IR2 * h_over_w); float sw = SZ3;// max(SZ3, 0.1); sx = max(min(sx, 1024.f), -1024.f); sy = max(min(sy, 1024.f), -1024.f); //float sx2 = SX2; //float sy2 = SY2; //float sz2 = SXY2.z; //float ftolerance = 5.f; //if ((fabs(sx - sx2) > ftolerance) || // (fabs(sy - sy2) > ftolerance) || // (fabs(sw - sz2) > ftolerance)) //{ // float r = 5; //} PGXP_pushSXYZ2f(sx , sy , sw, _v); return; } int PGXP_NLCIP_valid(u32 sxy0, u32 sxy1, u32 sxy2) { Validate(&SXY0, sxy0); Validate(&SXY1, sxy1); Validate(&SXY2, sxy2); if (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_012) == VALID_012) && Config.PGXP_GTE && (Config.PGXP_Mode > 0)) return 1; return 0; } float PGXP_NCLIP() { float nclip = ((SX0 * SY1) + (SX1 * SY2) + (SX2 * SY0) - (SX0 * SY2) - (SX1 * SY0) - (SX2 * SY1)); // ensure fractional values are not incorrectly rounded to 0 float nclipAbs = fabs(nclip); if (( 0.1f < nclipAbs) && (nclipAbs < 1.f)) nclip += (nclip < 0.f ? -1 : 1); //float AX = SX1 - SX0; //float AY = SY1 - SY0; //float BX = SX2 - SX0; //float BY = SY2 - SY0; //// normalise A and B //float mA = sqrt((AX*AX) + (AY*AY)); //float mB = sqrt((BX*BX) + (BY*BY)); //// calculate AxB to get Z component of C //float CZ = ((AX * BY) - (AY * BX)) * (1 << 12); return nclip; } static PGXP_value PGXP_MFC2_int(u32 reg) { switch (reg) { case 15: GTE_data_reg[reg] = SXYP = SXY2; break; } return GTE_data_reg[reg]; } static void PGXP_MTC2_int(PGXP_value value, u32 reg) { switch(reg) { case 15: // push FIFO SXY0 = SXY1; SXY1 = SXY2; SXY2 = value; SXYP = SXY2; break; case 31: return; } GTE_data_reg[reg] = value; } //////////////////////////////////// // Data transfer tracking //////////////////////////////////// void MFC2(int reg) { psx_value val; val.d = GTE_data_reg[reg].value; switch (reg) { case 1: case 3: case 5: case 8: case 9: case 10: case 11: GTE_data_reg[reg].value = (s32)val.sw.l; GTE_data_reg[reg].y = 0.f; break; case 7: case 16: case 17: case 18: case 19: GTE_data_reg[reg].value = (u32)val.w.l; GTE_data_reg[reg].y = 0.f; break; case 15: GTE_data_reg[reg] = SXY2; break; case 28: case 29: // psxRegs.CP2D.p[reg].d = LIM(IR1 >> 7, 0x1f, 0, 0) | (LIM(IR2 >> 7, 0x1f, 0, 0) << 5) | (LIM(IR3 >> 7, 0x1f, 0, 0) << 10); break; } } void PGXP_GTE_MFC2(u32 instr, u32 rtVal, u32 rdVal) { // CPU[Rt] = GTE_D[Rd] Validate(>E_data_reg[rd(instr)], rdVal); //MFC2(rd(instr)); CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)]; CPU_reg[rt(instr)].value = rtVal; } void PGXP_GTE_MTC2(u32 instr, u32 rdVal, u32 rtVal) { // GTE_D[Rd] = CPU[Rt] Validate(&CPU_reg[rt(instr)], rtVal); PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr)); GTE_data_reg[rd(instr)].value = rdVal; } void PGXP_GTE_CFC2(u32 instr, u32 rtVal, u32 rdVal) { // CPU[Rt] = GTE_C[Rd] Validate(>E_ctrl_reg[rd(instr)], rdVal); CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)]; CPU_reg[rt(instr)].value = rtVal; } void PGXP_GTE_CTC2(u32 instr, u32 rdVal, u32 rtVal) { // GTE_C[Rd] = CPU[Rt] Validate(&CPU_reg[rt(instr)], rtVal); GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)]; GTE_ctrl_reg[rd(instr)].value = rdVal; } //////////////////////////////////// // Memory Access //////////////////////////////////// void PGXP_GTE_LWC2(u32 instr, u32 rtVal, u32 addr) { // GTE_D[Rt] = Mem[addr] PGXP_value val; ValidateAndCopyMem(&val, addr, rtVal); PGXP_MTC2_int(val, rt(instr)); } void PGXP_GTE_SWC2(u32 instr, u32 rtVal, u32 addr) { // Mem[addr] = GTE_D[Rt] Validate(>E_data_reg[rt(instr)], rtVal); WriteMem(>E_data_reg[rt(instr)], addr); }