diff options
| author | Xavier ASUS <xavi92psx@gmail.com> | 2019-10-18 00:31:54 +0200 |
|---|---|---|
| committer | Xavier ASUS <xavi92psx@gmail.com> | 2019-10-18 00:31:54 +0200 |
| commit | 268a53de823a6750d6256ee1fb1e7707b4b45740 (patch) | |
| tree | 42c1799a9a82b2f7d9790ee9fe181d72a7274751 /src/stm8 | |
| download | sdcc-gas-268a53de823a6750d6256ee1fb1e7707b4b45740.tar.gz | |
sdcc-3.9.0 fork implementing GNU assembler syntax
This fork aims to provide better support for stm8-binutils
Diffstat (limited to 'src/stm8')
| -rw-r--r-- | src/stm8/Makefile | 7 | ||||
| -rw-r--r-- | src/stm8/Makefile.in | 7 | ||||
| -rw-r--r-- | src/stm8/gen.c | 8223 | ||||
| -rw-r--r-- | src/stm8/gen.h | 90 | ||||
| -rw-r--r-- | src/stm8/main.c | 556 | ||||
| -rw-r--r-- | src/stm8/peep.c | 1236 | ||||
| -rw-r--r-- | src/stm8/peep.h | 5 | ||||
| -rw-r--r-- | src/stm8/peeph.def | 1592 | ||||
| -rw-r--r-- | src/stm8/ralloc.c | 843 | ||||
| -rw-r--r-- | src/stm8/ralloc.h | 70 | ||||
| -rw-r--r-- | src/stm8/ralloc2.cc | 608 | ||||
| -rw-r--r-- | src/stm8/stm8.vcxproj | 114 | ||||
| -rw-r--r-- | src/stm8/stm8.vcxproj.filters | 52 |
13 files changed, 13403 insertions, 0 deletions
diff --git a/src/stm8/Makefile b/src/stm8/Makefile new file mode 100644 index 0000000..cb704c7 --- /dev/null +++ b/src/stm8/Makefile @@ -0,0 +1,7 @@ + +srcdir = . +top_builddir = ../.. +top_srcdir = ../.. + +# Make all in this directory +include $(srcdir)/../port.mk diff --git a/src/stm8/Makefile.in b/src/stm8/Makefile.in new file mode 100644 index 0000000..dfb8a52 --- /dev/null +++ b/src/stm8/Makefile.in @@ -0,0 +1,7 @@ +VPATH = @srcdir@ +srcdir = @srcdir@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# Make all in this directory +include $(srcdir)/../port.mk diff --git a/src/stm8/gen.c b/src/stm8/gen.c new file mode 100644 index 0000000..7bf8c22 --- /dev/null +++ b/src/stm8/gen.c @@ -0,0 +1,8223 @@ +/*------------------------------------------------------------------------- + gen.c - code generator for STM8. + + Copyright (C) 2012 - 2013, Philipp Klaus Krause pkk@spth.de, philipp@informatik.uni-frankfurt.de) + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +-------------------------------------------------------------------------*/ + +#include "ralloc.h" +#include "gen.h" + +/* Use the D macro for basic (unobtrusive) debugging messages */ +#define D(x) do if (options.verboseAsm) { x; } while (0) + +static bool regalloc_dry_run; +static unsigned int regalloc_dry_run_cost_bytes; +static unsigned int regalloc_dry_run_cost_cycles; +static unsigned int regalloc_dry_run_cycle_scale = 1; + +static struct +{ + short debugLine; + struct + { + int pushed; + int size; + int param_offset; + } stack; + bool saved; +} +G; + +enum asminst +{ + A_ADC, + A_ADD, + A_AND, + A_BCP, + A_CLR, + A_CLRW, + A_CP, + A_CPL, + A_CPLW, + A_DEC, + A_DECW, + A_INC, + A_INCW, + A_LD, + A_MOV, + A_NEG, + A_NEGW, + A_OR, + A_RLC, + A_RLCW, + A_RLWA, + A_RRC, + A_RRCW, + A_RRWA, + A_SBC, + A_SLL, + A_SLLW, + A_SRA, + A_SRAW, + A_SRL, + A_SRLW, + A_SUB, + A_SWAP, + A_TNZ, + A_TNZW, + A_XOR +}; + +static const char *asminstnames[] = +{ + "adc", + "add", + "and", + "bcp", + "clr", + "clrw", + "cp", + "cpl", + "cplw", + "dec", + "decw", + "inc", + "incw", + "ld", + "mov", + "neg", + "negw", + "or", + "rlc", + "rlcw", + "rlwa", + "rrc", + "rrcw", + "rrwa", + "sbc", + "sll", + "sllw", + "sra", + "sraw", + "srl", + "srlw", + "sub", + "swap", + "tnz", + "tnzw", + "xor" +}; + +static struct asmop asmop_a, asmop_x, asmop_y, asmop_xy, asmop_xyl, asmop_zero, asmop_one; +static struct asmop *const ASMOP_A = &asmop_a; +static struct asmop *const ASMOP_X = &asmop_x; +static struct asmop *const ASMOP_Y = &asmop_y; +static struct asmop *const ASMOP_XY = &asmop_xy; +static struct asmop *const ASMOP_XYL = &asmop_xyl; +static struct asmop *const ASMOP_ZERO = &asmop_zero; +static struct asmop *const ASMOP_ONE = &asmop_one; + +void +stm8_init_asmops (void) +{ + asmop_a.type = AOP_REG; + asmop_a.size = 1; + asmop_a.aopu.bytes[0].in_reg = TRUE; + asmop_a.aopu.bytes[0].byteu.reg = stm8_regs + A_IDX; + asmop_a.regs[A_IDX] = 0; + asmop_a.regs[XL_IDX] = -1; + asmop_a.regs[XH_IDX] = -1; + asmop_a.regs[YL_IDX] = -1; + asmop_a.regs[YH_IDX] = -1; + asmop_a.regs[C_IDX] = -1; + + asmop_x.type = AOP_REG; + asmop_x.size = 2; + asmop_x.aopu.bytes[0].in_reg = TRUE; + asmop_x.aopu.bytes[0].byteu.reg = stm8_regs + XL_IDX; + asmop_x.aopu.bytes[1].in_reg = TRUE; + asmop_x.aopu.bytes[1].byteu.reg = stm8_regs + XH_IDX; + asmop_x.regs[A_IDX] = -1; + asmop_x.regs[XL_IDX] = 0; + asmop_x.regs[XH_IDX] = 1; + asmop_x.regs[YL_IDX] = -1; + asmop_x.regs[YH_IDX] = -1; + asmop_x.regs[C_IDX] = -1; + + asmop_y.type = AOP_REG; + asmop_y.size = 2; + asmop_y.aopu.bytes[0].in_reg = TRUE; + asmop_y.aopu.bytes[0].byteu.reg = stm8_regs + YL_IDX; + asmop_y.aopu.bytes[1].in_reg = TRUE; + asmop_y.aopu.bytes[1].byteu.reg = stm8_regs + YH_IDX; + asmop_y.regs[A_IDX] = -1; + asmop_y.regs[XL_IDX] = -1; + asmop_y.regs[XH_IDX] = -1; + asmop_y.regs[YL_IDX] = 0; + asmop_y.regs[YH_IDX] = 1; + asmop_y.regs[C_IDX] = -1; + + asmop_xy.type = AOP_REG; + asmop_xy.size = 4; + asmop_xy.aopu.bytes[0].in_reg = TRUE; + asmop_xy.aopu.bytes[0].byteu.reg = stm8_regs + XL_IDX; + asmop_xy.aopu.bytes[1].in_reg = TRUE; + asmop_xy.aopu.bytes[1].byteu.reg = stm8_regs + XH_IDX; + asmop_xy.aopu.bytes[2].in_reg = TRUE; + asmop_xy.aopu.bytes[2].byteu.reg = stm8_regs + YL_IDX; + asmop_xy.aopu.bytes[3].in_reg = TRUE; + asmop_xy.aopu.bytes[3].byteu.reg = stm8_regs + YH_IDX; + asmop_xy.regs[A_IDX] = -1; + asmop_xy.regs[XL_IDX] = 0; + asmop_xy.regs[XH_IDX] = 1; + asmop_xy.regs[YL_IDX] = 2; + asmop_xy.regs[YH_IDX] = 3; + asmop_xy.regs[C_IDX] = -1; + + asmop_xyl.type = AOP_REG; + asmop_xyl.size = 3; + asmop_xyl.aopu.bytes[0].in_reg = TRUE; + asmop_xyl.aopu.bytes[0].byteu.reg = stm8_regs + XL_IDX; + asmop_xyl.aopu.bytes[1].in_reg = TRUE; + asmop_xyl.aopu.bytes[1].byteu.reg = stm8_regs + XH_IDX; + asmop_xyl.aopu.bytes[2].in_reg = TRUE; + asmop_xyl.aopu.bytes[2].byteu.reg = stm8_regs + YL_IDX; + asmop_xy.regs[A_IDX] = -1; + asmop_xy.regs[XL_IDX] = 0; + asmop_xy.regs[XH_IDX] = 1; + asmop_xy.regs[YL_IDX] = 2; + asmop_xy.regs[YH_IDX] = -1; + asmop_xy.regs[C_IDX] = -1; + + asmop_zero.type = AOP_LIT; + asmop_zero.size = 1; + asmop_zero.aopu.aop_lit = constVal ("0"); + asmop_zero.regs[A_IDX] = -1; + asmop_zero.regs[XL_IDX] = -1; + asmop_zero.regs[XH_IDX] = -1; + asmop_zero.regs[YL_IDX] = -1; + asmop_zero.regs[YH_IDX] = -1; + asmop_zero.regs[C_IDX] = -1; + + asmop_one.type = AOP_LIT; + asmop_one.size = 1; + asmop_one.aopu.aop_lit = constVal ("1"); + asmop_one.regs[A_IDX] = -1; + asmop_one.regs[XL_IDX] = -1; + asmop_one.regs[XH_IDX] = -1; + asmop_one.regs[YL_IDX] = -1; + asmop_one.regs[YH_IDX] = -1; + asmop_one.regs[C_IDX] = -1; +} + +void emit2 (const char *inst, const char *fmt, ...) +{ + if (!regalloc_dry_run) + { + va_list ap; + + va_start (ap, fmt); + va_emitcode (inst, fmt, ap); + va_end (ap); + } +} + +/*--------------------------------------------------------------------------*/ +/* updateCFA - update the debugger information to reflect the current */ +/* connonical frame address relative to the stack pointer */ +/*--------------------------------------------------------------------------*/ +static void +updateCFA (void) +{ + /* there is no frame unless there is a function */ + if (!currFunc) + return; + + if (options.debug && !regalloc_dry_run) + debugFile->writeFrameAddress (NULL, &stm8_regs[SP_IDX], 1 + G.stack.param_offset + G.stack.pushed); +} +// + +/*-----------------------------------------------------------------*/ +/* aopRS - asmop in register or on stack */ +/*-----------------------------------------------------------------*/ +static bool +aopRS (const asmop *aop) +{ + return (aop->type == AOP_REG || aop->type == AOP_REGSTK || aop->type == AOP_STK); +} + +/*-----------------------------------------------------------------*/ +/* aopInReg - asmop from offset in the register */ +/*-----------------------------------------------------------------*/ +static bool +aopInReg (const asmop *aop, int offset, short rIdx) +{ + if (!(aop->type == AOP_REG || aop->type == AOP_REGSTK)) + return (FALSE); + + if (offset >= aop->size || offset < 0) + return (FALSE); + + if (rIdx == X_IDX) + return (aopInReg (aop, offset, XL_IDX) && aopInReg (aop, offset + 1, XH_IDX)); + + if (rIdx == Y_IDX) + return (aopInReg (aop, offset, YL_IDX) && aopInReg (aop, offset + 1, YH_IDX)); + + return (aop->aopu.bytes[offset].in_reg && aop->aopu.bytes[offset].byteu.reg->rIdx == rIdx); +} + +/*-----------------------------------------------------------------*/ +/* aopOnStack - asmop from offset on stack in consecutive memory */ +/*-----------------------------------------------------------------*/ +static bool +aopOnStack (const asmop *aop, int offset, int size) +{ + int i; + long int stk_base; + + if (!(aop->type == AOP_STK || aop->type == AOP_REGSTK)) + return (FALSE); + + if (offset + size > aop->size) + return (FALSE); + + // Fully on stack? + for (i = offset; i < offset + size; i++) + if (aop->aopu.bytes[i].in_reg) + return (FALSE); + + // Consecutive? + stk_base = aop->aopu.bytes[offset].byteu.stk; + for (i = 1; i < size; i++) + if (!regalloc_dry_run && aop->aopu.bytes[offset + i].byteu.stk != stk_base - i) // Todo: Stack offsets might be unavailable during dry run (messes with addition costs, so we should have a mechanism to do it better). + return (FALSE); + + return (TRUE); +} + +/*-----------------------------------------------------------------*/ +/* aopOnStack - asmop from offset on stack (excl. extended stack) */ +/*-----------------------------------------------------------------*/ +static bool +aopOnStackNotExt (const asmop *aop, int offset, int size) +{ + return (aopOnStack (aop, offset, size) && (aop->aopu.bytes[offset].byteu.stk + G.stack.pushed <= 255 || regalloc_dry_run));// Todo: Stack offsets might be unavailable during dry run (messes with addition costs, so we should have a mechanism to do it better). +} + +/*-----------------------------------------------------------------*/ +/* aopSame - are two asmops in the same location? */ +/*-----------------------------------------------------------------*/ +static bool +aopSame (const asmop *aop1, int offset1, const asmop *aop2, int offset2, int size) +{ + for(; size; size--, offset1++, offset2++) + { + if (aopRS (aop1) && aopRS (aop2) && // Same register + aop1->aopu.bytes[offset1].in_reg && aop2->aopu.bytes[offset2].in_reg && + aop1->aopu.bytes[offset1].byteu.reg == aop2->aopu.bytes[offset2].byteu.reg) + continue; + + if (aopOnStack (aop1, offset1, 1) && aopOnStack (aop2, offset2, 1) && // Same stack location + aop1->aopu.bytes[offset1].byteu.stk == aop2->aopu.bytes[offset2].byteu.stk) + continue; + + if (aop1->type == AOP_LIT && aop2->type == AOP_LIT && + byteOfVal (aop1->aopu.aop_lit, offset1) == byteOfVal (aop2->aopu.aop_lit, offset2)) + continue; + + if (aop1->type == AOP_DIR && aop2->type == AOP_DIR && + offset1 == offset2 && !strcmp(aop1->aopu.aop_dir, aop2->aopu.aop_dir)) + return (TRUE); + + return (FALSE); + } + + return (TRUE); +} + +/*-----------------------------------------------------------------*/ +/* aopIsLitVal - asmop from offset is val */ +/*-----------------------------------------------------------------*/ +static bool +aopIsLitVal (const asmop *aop, int offset, int size, unsigned long long int val) +{ + wassert_bt (size <= sizeof (unsigned long long int)); // Make sure we are not testing outside of argument val. + + for(; size; size--, offset++) + { + unsigned char b = val & 0xff; + val >>= 8; + + // Leading zeroes + if (aop->size <= offset && !b) + continue; + + if (aop->type != AOP_LIT) + return (FALSE); + + if (byteOfVal (aop->aopu.aop_lit, offset) != b) + return (FALSE); + } + + return (TRUE); +} + +static void +cost(unsigned int bytes, unsigned int cycles) +{ + regalloc_dry_run_cost_bytes += bytes; + regalloc_dry_run_cost_cycles += cycles * regalloc_dry_run_cycle_scale; +} + +void emitJP(const symbol *target, float probability) +{ + if (!regalloc_dry_run) + emit2 (options.model == MODEL_LARGE ? "jpf" : "jp", "%05d$", labelKey2num (target->key)); + cost (3 + (options.model == MODEL_LARGE), (1 + (options.model == MODEL_LARGE)) * probability); +} + +static const char * +aopGet(const asmop *aop, int offset) +{ + static char buffer[256]; + + /* Don't really need the value during dry runs, so save some time. */ + if (regalloc_dry_run) + return (""); + + if (offset >= aop->size) + return ("#0x00"); + + if (aop->type == AOP_LIT) + { + SNPRINTF (buffer, sizeof(buffer), "#0x%02x", byteOfVal (aop->aopu.aop_lit, offset)); + return (buffer); + } + + if (aopRS (aop) && aop->aopu.bytes[offset].in_reg) + return (aop->aopu.bytes[offset].byteu.reg->name); + + if (aopRS (aop) && !aop->aopu.bytes[offset].in_reg) + { + int soffset = aop->aopu.bytes[offset].byteu.stk + G.stack.pushed; + + if (soffset > 255) + { + long int eoffset = (long int)(aop->aopu.bytes[offset].byteu.stk) + G.stack.size - 256l; + + wassertl_bt (regalloc_dry_run || stm8_extend_stack, "Extended stack access, but y not prepared for extended stack access."); + wassertl_bt (regalloc_dry_run || eoffset >= 0l && eoffset <= 0xffffl, "Stack access out of extended stack range."); // Stack > 64K. + + SNPRINTF (buffer, sizeof(buffer), "(0x%x, y)", (unsigned)eoffset); + } + else + SNPRINTF (buffer, sizeof(buffer), "(0x%02x, sp)", (unsigned)soffset); + return (buffer); + } + + if (aop->type == AOP_IMMD) + { + wassertl_bt (offset < (2 + (options.model == MODEL_LARGE)), "Immediate operand out of range"); + if (offset == 0) + SNPRINTF (buffer, sizeof(buffer), "#<(%s + %d)", aop->aopu.immd, aop->aopu.immd_off); + else + SNPRINTF (buffer, sizeof(buffer), "#((%s + %d) >> %d)", aop->aopu.immd, aop->aopu.immd_off, offset * 8); + return (buffer); + } + + if (aop->type == AOP_DIR) + { + SNPRINTF (buffer, sizeof(buffer), "%s+%d", aop->aopu.aop_dir, aop->size - 1 - offset); + return (buffer); + } + + wassert_bt (0); + return ("dummy"); +} + +static const char * +aopGet2(const asmop *aop, int offset) +{ + static char buffer[256]; + + /* Workaround for an assembler issue */ + if (regalloc_dry_run && aop->type == AOP_IMMD && offset) + cost (100, 100); + /* Don't really need the value during dry runs, so save some time. */ + if (regalloc_dry_run) + return (""); + + if (aopInReg (aop, offset, X_IDX)) + return("x"); + if (aopInReg (aop, offset, Y_IDX)) + return("y"); + + if (aop->type != AOP_LIT && !aopOnStack (aop, offset, 2) && aop->type != AOP_IMMD && aop->type != AOP_DIR) + fprintf (stderr, "Invalid aop for aopGet2. aop->type %d. aop->size %d.\n", aop->type, aop->size); + wassert_bt (aop->type == AOP_LIT || aopOnStack (aop, offset, 2) || aop->type == AOP_IMMD || aop->type == AOP_DIR); + + if (aop->type == AOP_LIT) + { + SNPRINTF (buffer, sizeof(buffer), "#0x%02x%02x", byteOfVal (aop->aopu.aop_lit, offset + 1), byteOfVal (aop->aopu.aop_lit, offset)); + return (buffer); + } + else if (aop->type == AOP_IMMD) + { + if (offset) + SNPRINTF (buffer, sizeof(buffer), "#((%s + %d) >> %d)", aop->aopu.immd, aop->aopu.immd_off, offset * 8); + else + SNPRINTF (buffer, sizeof(buffer), "#(%s + %d)", aop->aopu.immd, aop->aopu.immd_off); + return (buffer); + } + + return (aopGet (aop, offset + 1)); +} + +/* For operations that always have the accumulator as left operand. */ +static void +op8_cost (const asmop *op2, int offset2) +{ + AOP_TYPE op2type = op2->type; + int r2Idx = ((aopRS (op2) && op2->aopu.bytes[offset2].in_reg)) ? op2->aopu.bytes[offset2].byteu.reg->rIdx : -1; + + if (offset2 >= op2->size) + op2type = AOP_LIT; + + switch (op2type) + { + case AOP_LIT: + case AOP_IMMD: + cost (2, 1); + return; + case AOP_DIR: + cost (3, 1); + return; + case AOP_REG: + case AOP_REGSTK: + case AOP_STK: + if (r2Idx != -1) + goto error; + cost (2, 1); + return; + default: + goto error; + } +error: + fprintf(stderr, "op2 type: %d, offset %d, rIdx %d\n", op2type, offset2, r2Idx); + wassert_bt (0); + cost (8, 4 * 8); +} + +/* For 8-bit operations that have only one operand, i.e. tnz */ +static void +op_cost (const asmop *op1, int offset1) +{ + AOP_TYPE op1type; + int r1Idx; + + wassert_bt (op1); + + op1type = op1->type; + r1Idx = ((aopRS (op1) && op1->aopu.bytes[offset1].in_reg)) ? op1->aopu.bytes[offset1].byteu.reg->rIdx : -1; + + switch (op1type) + { + case AOP_DIR: + cost (4, 1); + return; + case AOP_REG: + case AOP_REGSTK: + case AOP_STK: + if (r1Idx == A_IDX) + { + cost (1, 1); + return; + } + if (r1Idx != -1) + goto error; + cost (2, 1); + return; + default: + goto error; + } +error: + fprintf(stderr, "op1 type: %d, offset %d, rIdx %d\n", op1type, offset1, r1Idx); + wassert_bt (0); + cost (8, 4 * 8); +} + +/* For cheap 16-bit operations that have only one operand, i.e. incw */ +static void +opw_cost (const asmop *op1, int offset1) +{ + wassert_bt (op1); + + if (aopInReg (op1, offset1, XL_IDX)) + { + cost (1, 1); + return; + } + else if (aopInReg (op1, offset1, YL_IDX)) + { + cost (2, 1); + return; + } + + wassert_bt (0); + cost (8, 4 * 8); +} + +/* For 16-bit operations that have only one operand, i.e. tnzw */ +static void +opw_cost2 (const asmop *op1, int offset1) +{ + wassert_bt (op1); + + if (aopInReg (op1, offset1, XL_IDX)) + { + cost (1, 2); + return; + } + else if (aopInReg (op1, offset1, YL_IDX)) + { + cost (2, 2); + return; + } + + wassert_bt (0); + cost (8, 4 * 8); +} + +static void +ld_cost (const asmop *op1, int offset1, const asmop *op2, int offset2) +{ + int r1Idx, r2Idx; + + AOP_TYPE op1type = op1->type; + AOP_TYPE op2type = op2->type; + + /* Costs are symmetric */ + if (aopRS (op2) || op2type == AOP_DUMMY) + { + const asmop *tmp = op1; + const int tmpo = offset1; + op1 = op2; + op2 = tmp; + offset1 = offset2; + offset2 = tmpo; + op1type = op1->type; + op2type = op2->type; + } + + r1Idx = ((aopRS (op1) && op1->aopu.bytes[offset1].in_reg)) ? op1->aopu.bytes[offset1].byteu.reg->rIdx : -1; + r2Idx = ((aopRS (op2) && op2->aopu.bytes[offset2].in_reg)) ? op2->aopu.bytes[offset2].byteu.reg->rIdx : -1; + + if (offset2 >= op2->size) + op2type = AOP_LIT; + + switch (op1type) + { + case AOP_REG: + case AOP_REGSTK: + case AOP_STK: + switch (op2type) + { + case AOP_LIT: + case AOP_IMMD: + if (r1Idx != A_IDX) + goto error; + cost (2, 1); + return; + case AOP_REG: + case AOP_REGSTK: + case AOP_STK: + switch (r1Idx) + { + case A_IDX: + switch (r2Idx) + { + case XL_IDX: + case XH_IDX: + cost (1, 1); + return; + case YL_IDX: + case YH_IDX: + case -1: + cost (2, 1); + return; + default: + goto error; + } + case XL_IDX: + case XH_IDX: + if (r2Idx != A_IDX) + goto error; + cost (1, 1); + return; + case YL_IDX: + case YH_IDX: + case -1: + if (r2Idx != A_IDX) + goto error; + cost (2, 1); + return; + } + case AOP_DIR: + if (r1Idx != A_IDX) + goto error; + cost (3, 2); + return; + default: + goto error; + } + case AOP_DIR: + if (r2Idx != A_IDX) + goto error; + cost (3, 2); + return; + default: + goto error; + } +error: + fprintf(stderr, "op1 type: %d, offset %d, rIdx %d\n", op1type, offset1, r1Idx); + fprintf(stderr, "op2 type: %d, offset %d, rIdx %d\n", op2type, offset2, r2Idx); + wassert_bt (0); + cost (8, 4 * 8); +} + +static void +mov_cost (const asmop *op1, const asmop *op2) +{ + if (op2->type == AOP_LIT || op2->type == AOP_IMMD) + cost (4, 1); + else + cost (5, 1); +} + +static void +emit3cost (enum asminst inst, const asmop *op1, int offset1, const asmop *op2, int offset2) +{ + switch (inst) + { + case A_ADC: + case A_ADD: + case A_AND: + case A_BCP: + op8_cost (op2, offset2); + break; + case A_CLR: + op_cost (op1, offset1); + break; + case A_CP: + op8_cost (op2, offset2); + break; + case A_CPL: + op_cost (op1, offset1); + break; + case A_INC: + case A_DEC: + op_cost (op1, offset1); + break; + case A_LD: + ld_cost (op1, offset1, op2, offset2); + break; + case A_MOV: + mov_cost (op1, op2); + break; + case A_NEG: + op_cost (op1, offset1); + break; + case A_OR: + op8_cost (op2, offset2); + break; + case A_RLC: + case A_RRC: + op_cost (op1, offset1); + break; + case A_SBC: + op8_cost (op2, offset2); + break; + case A_SLL: + case A_SRA: + case A_SRL: + op_cost (op1, offset1); + break; + case A_SUB: + op8_cost (op2, offset2); + break; + case A_SWAP: + case A_TNZ: + op_cost (op1, offset1); + break; + case A_XOR: + op8_cost (op2, offset2); + break; + default: + wassertl_bt (0, "Tried to get cost for unknown 8-bit instruction"); + } +} + +static void +emit3wcost (enum asminst inst, const asmop *op1, int offset1, const asmop *op2, int offset2) +{ + switch (inst) + { + case A_CLRW: + opw_cost (op1, offset1); + break; + case A_CPLW: + opw_cost2 (op1, offset1); + break; + case A_DECW: + case A_INCW: + opw_cost (op1, offset1); + break; + case A_NEGW: + case A_RLCW: + opw_cost2 (op1, offset1); + break; + case A_RLWA: + opw_cost (op1, offset1); + break; + case A_RRCW: + opw_cost2 (op1, offset1); + break; + case A_RRWA: + opw_cost (op1, offset1); + break; + case A_SLLW: + case A_SRAW: + case A_SRLW: + case A_TNZW: + opw_cost2 (op1, offset1); + break; + default: + wassertl_bt (0, "Tried to get cost for unknown 16-bit instruction"); + } +} + +static void +emit3_o (enum asminst inst, asmop *op1, int offset1, asmop *op2, int offset2) +{ + emit3cost (inst, op1, offset1, op2, offset2); + if (regalloc_dry_run) + return; + + if (op2) + { + char *l = Safe_strdup (aopGet (op1, offset1)); + emit2 (asminstnames[inst], "%s, %s", l, aopGet (op2, offset2)); + Safe_free (l); + } + else + emit2 (asminstnames[inst], "%s", aopGet (op1, offset1)); +} + +static void +emit3w_o (enum asminst inst, asmop *op1, int offset1, asmop *op2, int offset2) +{ + emit3wcost (inst, op1, offset1, op2, offset2); + if (regalloc_dry_run) + return; + + if (op2) + { + char *l = Safe_strdup (aopGet2 (op1, offset1)); + emit2 (asminstnames[inst], "%s, %s", l, aopGet2 (op2, offset2)); + Safe_free (l); + } + else + emit2 (asminstnames[inst], "%s", aopGet2 (op1, offset1)); +} + +static void +emit3 (enum asminst inst, asmop *op1, asmop *op2) +{ + emit3_o (inst, op1, 0, op2, 0); +} + +static void +emit3w (enum asminst inst, asmop *op1, asmop *op2) +{ + emit3w_o (inst, op1, 0, op2, 0); +} + +static bool +regFree (int idx, const iCode *ic) +{ + if (idx == X_IDX) + return (regFree (XL_IDX, ic) && regFree (XH_IDX, ic)); + if (idx == Y_IDX) + return (regFree (YL_IDX, ic) && regFree (YH_IDX, ic)); + + if ((idx == YL_IDX || idx == YH_IDX) && stm8_extend_stack) + return FALSE; + + return (!bitVectBitValue (ic->rMask, idx)); +} + +static bool +regDead (int idx, const iCode *ic) +{ + if (idx == X_IDX) + return (regDead (XL_IDX, ic) && regDead (XH_IDX, ic)); + if (idx == Y_IDX) + return (regDead (YL_IDX, ic) && regDead (YH_IDX, ic)); + + if ((idx == YL_IDX || idx == YH_IDX) && stm8_extend_stack) + return FALSE; + + return (!bitVectBitValue (ic->rSurv, idx)); +} + +/*-----------------------------------------------------------------*/ +/* newAsmop - creates a new asmOp */ +/*-----------------------------------------------------------------*/ +static asmop * +newAsmop (short type) +{ + asmop *aop; + + aop = Safe_calloc (1, sizeof (asmop)); + aop->type = type; + + aop->regs[A_IDX] = -1; + aop->regs[XL_IDX] = -1; + aop->regs[XH_IDX] = -1; + aop->regs[YL_IDX] = -1; + aop->regs[YH_IDX] = -1; + aop->regs[C_IDX] = -1; + + return (aop); +} + +/*-----------------------------------------------------------------*/ +/* freeAsmop - free up the asmop given to an operand */ +/*----------------------------------------------------------------*/ +static void +freeAsmop (operand *op) +{ + asmop *aop; + + wassert_bt (op); + + aop = op->aop; + + if (!aop) + return; + + Safe_free (aop); + + op->aop = NULL; + if (IS_SYMOP (op) && SPIL_LOC (op)) + SPIL_LOC (op)->aop = NULL; +} + +/*-----------------------------------------------------------------*/ +/* aopForSym - for a true symbol */ +/*-----------------------------------------------------------------*/ +static asmop * +aopForSym (const iCode *ic, symbol *sym) +{ + asmop *aop; + + wassert_bt (ic); + wassert_bt (sym); + wassert_bt (sym->etype); + + // Unlike other backends we really free asmops; to avoid a double-free, we need to support multiple asmops for the same symbol. + + if (IS_FUNC (sym->type)) + { + aop = newAsmop (AOP_IMMD); + aop->aopu.immd = sym->rname; + aop->aopu.immd_off = 0; + aop->size = getSize (sym->type); + } + /* Assign depending on the storage class */ + else if (sym->onStack || sym->iaccess) + { + int offset; + int base; + + aop = newAsmop (AOP_STK); + aop->size = getSize (sym->type); + + base = sym->stack + (sym->stack > 0 ? G.stack.param_offset : 0); + + for(offset = 0; offset < aop->size; offset++) + aop->aopu.bytes[offset].byteu.stk = base + aop->size - offset; + } + else + { + aop = newAsmop (AOP_DIR); + aop->aopu.aop_dir = sym->rname; + aop->size = getSize (sym->type); + } + + return (aop); +} + +/*-----------------------------------------------------------------*/ +/* aopForRemat - rematerializes an object */ +/*-----------------------------------------------------------------*/ +static asmop * +aopForRemat (symbol *sym) +{ + iCode *ic = sym->rematiCode; + asmop *aop; + int val = 0; + + wassert_bt (ic); + + for (;;) + { + if (ic->op == '+') + { + if (isOperandLiteral (IC_RIGHT (ic))) + { + val += (int) operandLitValue (IC_RIGHT (ic)); + ic = OP_SYMBOL (IC_LEFT (ic))->rematiCode; + } + else + { + val += (int) operandLitValue (IC_LEFT (ic)); + ic = OP_SYMBOL (IC_RIGHT (ic))->rematiCode; + } + } + else if (ic->op == '-') + { + val -= (int) operandLitValue (IC_RIGHT (ic)); + ic = OP_SYMBOL (IC_LEFT (ic))->rematiCode; + } + else if (IS_CAST_ICODE (ic)) + { + ic = OP_SYMBOL (IC_RIGHT (ic))->rematiCode; + } + else if (ic->op == ADDRESS_OF) + { + val += (int) operandLitValue (IC_RIGHT (ic)); + break; + } + else + wassert_bt (0); + } + + if (OP_SYMBOL (IC_LEFT (ic))->onStack) + { + aop = newAsmop (AOP_STL); + aop->aopu.stk_off = (long)(OP_SYMBOL (IC_LEFT (ic))->stack) + 1 + val; + } + else + { + aop = newAsmop (AOP_IMMD); + aop->aopu.immd = OP_SYMBOL (IC_LEFT (ic))->rname; + aop->aopu.immd_off = val; + } + + aop->size = getSize (sym->type); + + return aop; +} + +/*-----------------------------------------------------------------*/ +/* aopOp - allocates an asmop for an operand : */ +/*-----------------------------------------------------------------*/ +static void +aopOp (operand *op, const iCode *ic) +{ + symbol *sym; + unsigned int i; + + wassert_bt (op); + + /* if already has an asmop */ + if (op->aop) + return; + + /* if this a literal */ + if (IS_OP_LITERAL (op)) + { + asmop *aop = newAsmop (AOP_LIT); + aop->aopu.aop_lit = OP_VALUE (op); + aop->size = getSize (operandType (op)); + op->aop = aop; + return; + } + + sym = OP_SYMBOL (op); + + /* if this is a true symbol */ + if (IS_TRUE_SYMOP (op)) + { + op->aop = aopForSym (ic, sym); + return; + } + + /* Rematerialize symbols where all bytes are spilt. */ + if (sym->remat && (sym->isspilt || regalloc_dry_run)) + { + bool completely_spilt = TRUE; + for (i = 0; i < getSize (sym->type); i++) + if (sym->regs[i]) + completely_spilt = FALSE; + if (completely_spilt) + { + op->aop = aopForRemat (sym); + return; + } + } + + /* if the type is a conditional */ + if (sym->regType == REG_CND) + { + asmop *aop = newAsmop (AOP_CND); + op->aop = aop; + sym->aop = sym->aop; + return; + } + + /* None of the above, which only leaves temporaries. */ + { + bool completely_in_regs = TRUE; + bool completely_on_stack = TRUE; + asmop *aop = newAsmop (AOP_REGSTK); + + aop->size = getSize (operandType (op)); + op->aop = aop; + + for (i = 0; i < aop->size; i++) + { + aop->aopu.bytes[i].in_reg = !!sym->regs[i]; + if (sym->regs[i]) + { + completely_on_stack = FALSE; + aop->aopu.bytes[i].byteu.reg = sym->regs[i]; + aop->regs[sym->regs[i]->rIdx] = i; + } + else if (sym->isspilt && sym->usl.spillLoc || sym->nRegs && regalloc_dry_run) + { + completely_in_regs = FALSE; + + if (!regalloc_dry_run) + { + aop->aopu.bytes[i].byteu.stk = (long int)(sym->usl.spillLoc->stack) + aop->size - i; + + if (sym->usl.spillLoc->stack + aop->size - (int)(i) <= -G.stack.pushed) + { + fprintf (stderr, "%s %d %d %d %d at ic %d\n", sym->name, (int)(sym->usl.spillLoc->stack), (int)(aop->size), (int)(i), (int)(G.stack.pushed), ic->key); + wassertl_bt (0, "Invalid stack offset."); + } + } + else + { + static long int old_base = -10; + static const symbol *old_sym = 0; + if (sym != old_sym) + { + old_base -= aop->size; + if (old_base < -100) + old_base = -10; + old_sym = sym; + } + + aop->aopu.bytes[i].byteu.stk = old_base + aop->size - i; + } + } + else // Dummy iTemp. + { + aop->type = AOP_DUMMY; + return; + } + + if (!completely_in_regs && (!currFunc || GcurMemmap == statsg)) + { + if (!regalloc_dry_run) + wassertl_bt (0, "Stack asmop outside of function."); + cost (180, 180); + } + } + + if (completely_in_regs) + aop->type = AOP_REG; + else if (completely_on_stack) + aop->type = AOP_STK; + + return; + } +} + +static void +push (const asmop *op, int offset, int size) +{ + if (size == 1) + { + emit2 ("push", "%s", aopGet (op, offset)); + if (op->type == AOP_LIT) + cost (2, 1); + else if (op->type == AOP_IMMD) + cost (2, 1); + else if (aopInReg (op, offset, A_IDX)) + cost (1, 1); + else if (op->type == AOP_DIR) + cost (3, 1); + else + wassertl_bt (0, "Invalid aop type for size 1 for push"); + } + else if (size == 2) + { + if (aopInReg (op, offset, X_IDX)) + { + emit2 ("pushw", "x"); + cost (1, 2); + } + else if (aopInReg (op, offset, Y_IDX)) + { + emit2 ("pushw", "y"); + cost (2, 2); + } + else + wassertl_bt (0, "Invalid aop type for size 2 for pushw"); + } + else + wassertl_bt (0, "Invalid size for push/pushw"); + + G.stack.pushed += size; + updateCFA (); +} + +static void +pop (const asmop *op, int offset, int size) +{ + if (size == 1) + { + emit2 ("pop", "%s", aopGet (op, offset)); + if (aopInReg (op, offset, A_IDX)) + cost (1, 1); + else if (op->type == AOP_DIR) + cost (3, 1); + else + wassertl_bt (0, "Invalid aop type for size 1 for pop"); + } + else if (size == 2) + { + if (aopInReg (op, offset, X_IDX)) + { + emit2 ("popw", "x"); + cost (1, 2); + } + else if (aopInReg (op, offset, Y_IDX)) + { + emit2 ("popw", "y"); + cost (2, 2); + } + else + wassertl_bt (0, "Invalid aop type for size 2 for popw"); + } + else + wassertl_bt (0, "Invalid size for pop/popw"); + + G.stack.pushed -= size; + updateCFA (); +} + +void swap_to_a(int idx) +{ + switch (idx) + { + case A_IDX: + break; + case XL_IDX: + emit2 ("exg", "a, xl"); + cost (1, 1); + break; + case XH_IDX: + emit3w (A_RLWA, ASMOP_X, 0); + break; + case YL_IDX: + emit2 ("exg", "a, yl"); + cost (1, 1); + break; + case YH_IDX: + emit3w (A_RLWA, ASMOP_Y, 0); + break; + default: + wassert_bt (0); + } +} + +void swap_from_a(int idx) +{ + switch (idx) + { + case A_IDX: + break; + case XL_IDX: + emit2 ("exg", "a, xl"); + cost (1, 1); + break; + case XH_IDX: + emit3w (A_RRWA, ASMOP_X, 0); + break; + case YL_IDX: + emit2 ("exg", "a, yl"); + cost (1, 1); + break; + case YH_IDX: + emit3w (A_RRWA, ASMOP_Y, 0); + break; + default: + wassert_bt (0); + } +} + +/*-----------------------------------------------------------------*/ +/* stackAop - put xl, xh, yl, yh aop on stack */ +/*-----------------------------------------------------------------*/ +static +const asmop *stack_aop (const asmop *aop, int i, int *offset) +{ + asmop *stacked = NULL; + + if (aopRS (aop) && !aopOnStack (aop, i, 1) && !aopInReg (aop, i, A_IDX)) + { + if (aop->aopu.bytes[i].byteu.reg->rIdx == XL_IDX) + { + stacked = ASMOP_X; + *offset = 2; + } + else if (aop->aopu.bytes[i].byteu.reg->rIdx == XH_IDX) + { + stacked = ASMOP_X; + *offset = 1; + } + else if (aop->aopu.bytes[i].byteu.reg->rIdx == YL_IDX) + { + stacked = ASMOP_Y; + *offset = 2; + } + else if (aop->aopu.bytes[i].byteu.reg->rIdx == YH_IDX) + { + stacked = ASMOP_Y; + *offset = 1; + } + else + wassert_bt (0); + push (stacked, 0, 2); + } + + return (stacked); +} + +/*--------------------------------------------------------------------------*/ +/* adjustStack - Adjust the stack pointer by n bytes. */ +/*--------------------------------------------------------------------------*/ +static void +adjustStack (int n, bool a_free, bool x_free, bool y_free) +{ + while (n) + { + // The manual is ambigious (not even documenting if the #byte is signed), but it from experimenting with the hardware it + // seems addw sp, byte has a signed operand, while sub sp, #byte has an unsigned operand, also, in contrast to what the + // manual states, addw sp, #byte only takes 1 cycle. + + // todo: For big n, use addition in X or Y when free. + if (abs (n) > 255 * 2 + (n > 0 || a_free) + (optimize.codeSize ? x_free : 255) && x_free) + { + emit2 ("ldw", "x, sp"); + emit2 (n > 0 ? "addw" : "subw", "x, #%d", abs (n)); + emit2 ("ldw", "sp, x"); + cost (5, 4); + G.stack.pushed -= n; + updateCFA (); + n -= n; + } + else if (abs(n) > 255 * 3 + (n > 0 || a_free) + (optimize.codeSize && x_free) && y_free) + { + emit2 ("ldw", "y, sp"); + emit2 (n > 0 ? "addw" : "subw", "y, #%d", abs (n)); + emit2 ("ldw", "sp, y"); + cost (5, 4); + G.stack.pushed -= n; + updateCFA (); + n -= n; + } + else if (n > 255) + { + emit2 ("addw", "sp, #255"); + cost (2, 1); + G.stack.pushed -= 255; + updateCFA (); + n -= 255; + } + else if (n < -255) + { + emit2 ("sub", "sp, #255"); + cost (2, 1); + G.stack.pushed += 255; + updateCFA (); + n += 255; + } + else if (n == 2 && x_free && optimize.codeSize) + { + pop (ASMOP_X, 0, 2); // 1 Byte, 2 cycles - cheaper than addw sp, #byte when optimizing for code size. + n -= 2; + } + else if (n == 1 && a_free) + { + pop (ASMOP_A, 0, 1); // 1 Byte, 1 cycle - cheaper than addw sp, #byte. + n--; + } + else if (n == -2 && optimize.codeSize) + { + push (ASMOP_X, 0, 2); // 1 Byte, 2 cycles - cheaper than addw sp, #byte when optimizing for code size. + n += 2; + } + else if (n == -1) + { + push (ASMOP_A, 0, 1); // 1 Byte, 1 cycle - cheaper than addw sp, #byte. + n++; + } + else + { + emit2 (n > 0 ? "addw" : "sub", "sp, #%d", abs (n)); + cost (2, 1); + G.stack.pushed -= n; + updateCFA (); + n -= n; + } + } +} + +/*-----------------------------------------------------------------*/ +/* cheapMove - Copy a byte from one asmop to another */ +/*-----------------------------------------------------------------*/ +static void +cheapMove (asmop *result, int roffset, asmop *source, int soffset, bool save_a) +{ + bool dummy = (result->type == AOP_DUMMY || source->type == AOP_DUMMY); + + if (aopSame (result, roffset, source, soffset, 1)) + return; + else if (!dummy && (!aopRS (result) || aopInReg (result, roffset, A_IDX) || aopOnStack (result, roffset, 1)) && aopIsLitVal (source, soffset, 1, 0)) + emit3_o (A_CLR, result, roffset, 0, 0); + else if (!dummy && (aopInReg (result, roffset, A_IDX) || aopInReg (source, soffset, A_IDX))) + emit3_o (A_LD, result, roffset, source, soffset); + else if (result->type == AOP_DIR && (source->type == AOP_DIR || source->type == AOP_LIT)) + emit3_o (A_MOV, result, roffset, source, soffset); + else if (aopRS (result) && !aopOnStack (result, roffset, 1) && save_a) + { + if (!aopInReg (result, roffset, A_IDX)) + swap_to_a (result->aopu.bytes[roffset].byteu.reg->rIdx); + + // Some special cases where swap_to_a() changed the location of the source operand. + if (aopInReg (result, roffset, XH_IDX) && aopInReg (source, soffset, XL_IDX)) + emit3_o (A_LD, ASMOP_A, 0, ASMOP_X, 1); + else if (aopInReg (result, roffset, YH_IDX) && aopInReg (source, soffset, YL_IDX)) + emit3_o (A_LD, ASMOP_A, 0, ASMOP_Y, 1); + else + cheapMove (ASMOP_A, 0, source, soffset, FALSE); + + if (!aopInReg (result, roffset, A_IDX)) + swap_from_a (result->aopu.bytes[roffset].byteu.reg->rIdx); + } + else + { + if (save_a) + push (ASMOP_A, 0, 1); + if (!aopInReg (source, soffset, A_IDX) && source->type != AOP_DUMMY) + cheapMove (ASMOP_A, 0, source, soffset, FALSE); + if (!aopInReg (result, roffset, A_IDX) && result->type != AOP_DUMMY) + emit3_o (A_LD, result, roffset, ASMOP_A, 0); + if (save_a) + pop (ASMOP_A, 0, 1); + } +} + +/*-----------------------------------------------------------------*/ +/* genCopyStack - Copy the value - stack to stack only */ +/*-----------------------------------------------------------------*/ +static void +genCopyStack (asmop *result, int roffset, asmop *source, int soffset, int n, bool *assigned, int *size, bool a_free, bool x_free, bool y_free, bool really_do_it_now) +{ + int i; + bool pushed_x = FALSE; + +#if 0 + D (emit2("; genCopyStack", "%d %d %d", a_free, x_free, y_free)); +#endif + + for (i = 0; i < n;) + { + if (!aopOnStack (result, roffset + i, 1) || !aopOnStack (source, soffset + i, 1)) + { + i++; + continue; + } + + // Same location. + if (!assigned[i] && + result->aopu.bytes[roffset + i].byteu.stk == source->aopu.bytes[soffset + i].byteu.stk) + { + wassert_bt (*size >= 1); + + assigned[i] = TRUE; + (*size)--; + i++; + continue; + } + + // Could transfer two bytes at a time now. + if (i + 1 < n && + !assigned[i] && !assigned[i + 1] && + aopOnStackNotExt (result, roffset + i, 2) && aopOnStackNotExt (source, soffset + i, 2)) + { + wassert_bt (*size >= 2); + + // Using ldw results in substancially shorter, but somewhat slower code. + if (!x_free && !y_free && really_do_it_now && (optimize.codeSize || !a_free && !optimize.codeSpeed)) + { + push (ASMOP_X, 0, 2); + pushed_x = TRUE; + x_free = TRUE; + } + + if (y_free) // Unlike with other operations, loading between y and stk is as efficient as for x, so we try y first here. + { + emit2 ("ldw", "y, %s", aopGet2 (source, soffset + i)); + emit2 ("ldw", "%s, y", aopGet2 (result, roffset + i)); + } + else if (x_free) + { + emit2 ("ldw", "x, %s", aopGet2 (source, soffset + i)); + emit2 ("ldw", "%s, x", aopGet2 (result, roffset + i)); + } + else + { + i++; + continue; + } + cost (4, 4); + assigned[i] = TRUE; + assigned[i + 1] = TRUE; + (*size) -= 2; + i += 2; + } + else + i++; + } + + for (i = 0; i < n; i++) + { + if (!aopOnStack (result, roffset + i, 1) || !aopOnStack (source, soffset + i, 1)) + continue; + + // Just one byte to transfer. + if ((a_free || really_do_it_now) && !assigned[i] && + (i + 1 >= n || assigned[i + 1] || really_do_it_now)) + { + wassert_bt (*size >= 1); + cheapMove (result, roffset + i, source, soffset + i, !a_free); + assigned[i] = TRUE; + (*size)--; + } + } + + if (pushed_x) + pop (ASMOP_X, 0, 2); + + wassertl_bt (*size >= 0, "genCopyStack() copied more than there is to be copied."); +} + +/*-----------------------------------------------------------------*/ +/* genCopy - Copy the value from one reg/stk asmop to another */ +/*-----------------------------------------------------------------*/ +static void +genCopy (asmop *result, int roffset, asmop *source, int soffset, int sizex, bool a_dead, bool x_dead, bool y_dead) +{ + int i, regsize, size, n = (sizex < source->size - soffset) ? sizex : (source->size - soffset); + bool assigned[8] = {false, false, false, false, false, false, false, false}; + bool a_free, x_free, y_free, xl_dead, xh_dead , yl_dead, yh_dead; + +#if 0 + D (emit2("; genCopy", "%d %d %d", a_dead, x_dead, y_dead)); +#endif + + wassertl_bt (n <= 8, "Invalid size for genCopy()."); + wassertl_bt (aopRS (source), "Invalid source type."); + wassertl_bt (aopRS (result), "Invalid result type."); + + size = n; + for (i = 0, regsize = 0; i < n; i++) + regsize += source->aopu.bytes[soffset + i].in_reg; + + a_dead |= (result->regs[A_IDX] >= 0 && result->regs[A_IDX] < n); + xl_dead = x_dead || (result->regs[XL_IDX] >= roffset && result->regs[XL_IDX] < roffset + n); + xh_dead = x_dead || (result->regs[XH_IDX] >= roffset && result->regs[XH_IDX] < roffset + n); + yl_dead = y_dead || (result->regs[YL_IDX] >= roffset && result->regs[YL_IDX] < roffset + n); + yh_dead = y_dead || (result->regs[YH_IDX] >= roffset && result->regs[YH_IDX] < roffset + n); + x_dead |= (xl_dead && xh_dead); + y_dead |= (yl_dead && yh_dead); + + // Do nothing for coalesced bytes. + for (i = 0; i < n; i++) + if (result->aopu.bytes[roffset + i].in_reg && source->aopu.bytes[soffset + i].in_reg && result->aopu.bytes[roffset + i].byteu.reg == source->aopu.bytes[soffset + i].byteu.reg) + { + assigned[i] = true; + regsize--; + size--; + } + + // Clear registers now that would be more expensive to clear later. + if(n >= 1 && !assigned[n - 1] && sizex > n && !assigned[n] && (aopInReg (result, roffset + n - 1, X_IDX) || aopInReg (result, roffset + n - 1, Y_IDX)) && // We want to clear the high byte of x or y. + size - regsize <= 1) // We won't need x or y for stack-to-stack copies. + { + const bool in_y = aopInReg (result, roffset + n - 1, Y_IDX); + const bool yl_free = source->regs[YL_IDX] < soffset || assigned[source->regs[YL_IDX] - soffset]; + const bool yh_free = source->regs[YH_IDX] < soffset || assigned[source->regs[YH_IDX] - soffset]; + const bool xl_free = source->regs[XL_IDX] < soffset || assigned[source->regs[XL_IDX] - soffset]; + const bool xh_free = source->regs[XH_IDX] < soffset || assigned[source->regs[XH_IDX] - soffset]; + const bool y_free = yl_free && yh_free; + const bool x_free = xl_free && xh_free; + + if (in_y ? y_free : x_free) + { + emit3w_o (A_CLRW, result, roffset + n - 1, 0, 0); + assigned[n] = true; + } + } + + // Handle stack locations that would be overwritten by data from registers + if (result->type == AOP_STK || result->type == AOP_REGSTK) + for (i = 0; i < n; i++) + { + if (assigned[i] || !aopOnStack (source, soffset + i, 1)) + continue; + for (int j = i + 1; j < n; j++) + { + if (!source->aopu.bytes[soffset + j].in_reg) + continue; + if (!aopOnStack (result, roffset + j, 1)) + continue; + if (result->aopu.bytes[roffset + j].byteu.stk != source->aopu.bytes[soffset + i].byteu.stk) + continue; + + cheapMove (result, roffset + i, source, soffset + i, false); + assigned[i] = true; + size--; + } + } + + // Move everything from registers to the stack. + for (i = 0; i < n;) + { + if (i < n - 1 && (aopInReg (source, soffset + i, X_IDX) || aopInReg (source, soffset + i, Y_IDX)) && aopOnStack (result, roffset + i, 2)) + { + wassert_bt (size >= 2); + + emit2 ("ldw", aopInReg (source, soffset + i, X_IDX) ? "%s, x" : "%s, y", aopGet2 (result, roffset + i)); + cost (2, 2); + assigned[i] = TRUE; + assigned[i + 1] = TRUE; + regsize -= 2; + size -= 2; + i += 2; + } + else if (aopRS (source) && !aopOnStack (source, soffset + i, 1) && aopOnStack (result, roffset + i, 1)) + { + wassert_bt (size >= 1); + + if (!aopInReg (source, soffset + i, A_IDX)) + swap_to_a (source->aopu.bytes[soffset + i].byteu.reg->rIdx); + emit3_o (A_LD, result, roffset + i, ASMOP_A, 0); + if (!aopInReg (source, soffset + i, A_IDX)) + swap_from_a (source->aopu.bytes[soffset + i].byteu.reg->rIdx); + assigned[i] = TRUE; + regsize--; + size--; + i++; + } + else // This byte is not a register-to-stack copy. + i++; + } + + // Copy (stack-to-stack) what we can with whatever free regs we have. + a_free = a_dead; + x_free = x_dead; + y_free = y_dead; + for (i = 0; i < n; i++) + { + asmop *operand; + int offset; + + if (!assigned[i]) + { + operand = source; + offset = soffset + i; + } + else + { + operand = result; + offset = roffset + i; + } + + if (aopInReg (operand, offset, A_IDX)) + a_free = FALSE; + else if (aopInReg (operand, offset, XL_IDX) || aopInReg (operand, offset, XH_IDX)) + x_free = FALSE; + else if (aopInReg (operand, offset, YL_IDX) || aopInReg (operand, offset, YH_IDX)) + y_free = FALSE; + } + genCopyStack (result, roffset, source, soffset, n, assigned, &size, a_free, x_free, y_free, false); + + // Now do the register shuffling. + + // Try to use exgw x, y. + if (regsize >= 3) + { + int ex[4] = {-2, -2, -2, -2}; + + // Find XL and check that it is exchanged with YL, find XH and check that it is exchanged with YH. + for (i = 0; i < n; i++) + { + if (!assigned[i] && aopInReg (result, roffset + i, XL_IDX) && aopInReg (source, soffset + i, YL_IDX)) + ex[0] = i; + if (!assigned[i] && aopInReg (result, roffset + i, YL_IDX) && aopInReg (source, soffset + i, XL_IDX)) + ex[1] = i; + if (!assigned[i] && aopInReg (result, roffset + i, XH_IDX) && aopInReg (source, soffset + i, YH_IDX)) + ex[2] = i; + if (!assigned[i] && aopInReg (result, roffset + i, YH_IDX) && aopInReg (source, soffset + i, XH_IDX)) + ex[3] = i; + } + + int exsum = (ex[0] >= 0) + (ex[1] >= 0) + (ex[2] >= 0) + (ex[3] >= 0); + + if (exsum == 4) + { + emit2 ("exgw", "x, y"); + cost (1, 1); + if(ex[0] >= 0) + assigned[ex[0]] = TRUE; + if(ex[1] >= 0) + assigned[ex[1]] = TRUE; + if(ex[2] >= 0) + assigned[ex[2]] = TRUE; + if(ex[3] >= 0) + assigned[ex[3]] = TRUE; + regsize -= exsum; + size -= exsum; + } + } + + // Try to use rlwa x. + if (regsize >= 3) + { + int ex[3] = {-1, -1, -1}; + + for (i = 0; i < n; i++) + { + if (!assigned[i] && aopInReg (result, roffset + i, XL_IDX) && aopInReg (source, soffset + i, A_IDX)) + ex[0] = i; + if (!assigned[i] && aopInReg (result, roffset + i, XH_IDX) && aopInReg (source, soffset + i, XL_IDX)) + ex[1] = i; + if (!assigned[i] && aopInReg (result, roffset + i, A_IDX) && aopInReg (source, soffset + i, XH_IDX)) + ex[2] = i; + } + if (ex[0] >= 0 && ex[1] >= 0 && ex[2] >= 0) + { + emit3w (A_RLWA, ASMOP_X, 0); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + assigned[ex[2]] = TRUE; + regsize -= 3; + size -= 3; + } + } + + // Try to use rrwa x. + if (regsize >= 3) + { + int ex[3] = {-1, -1, -1}; + + for (i = 0; i < n; i++) + { + if (!assigned[i] && aopInReg (result, roffset + i, XL_IDX) && aopInReg (source, soffset + i, XH_IDX)) + ex[0] = i; + if (!assigned[i] && aopInReg (result, roffset + i, XH_IDX) && aopInReg (source, soffset + i, A_IDX)) + ex[1] = i; + if (!assigned[i] && aopInReg (result, roffset + i, A_IDX) && aopInReg (source, soffset + i, XL_IDX)) + ex[2] = i; + } + if (ex[0] >= 0 && ex[1] >= 0 && ex[2] >= 0) + { + emit3w (A_RRWA, ASMOP_X, 0); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + assigned[ex[2]] = TRUE; + regsize -= 3; + size -= 3; + } + } + + // Try to use rlwa y. + if (regsize >= 3) + { + int ex[3] = {-1, -1, -1}; + + for (i = 0; i < n; i++) + { + if (!assigned[i] && aopInReg (result, roffset + i, YL_IDX) && aopInReg (source, soffset + i, A_IDX)) + ex[0] = i; + if (!assigned[i] && aopInReg (result, roffset + i, YH_IDX) && aopInReg (source, soffset + i, YL_IDX)) + ex[1] = i; + if (!assigned[i] && aopInReg (result, roffset + i, A_IDX) && aopInReg (source, soffset + i, YH_IDX)) + ex[2] = i; + } + if (ex[0] >= 0 && ex[1] >= 0 && ex[2] >= 0) + { + emit3w (A_RLWA, ASMOP_Y, 0); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + assigned[ex[2]] = TRUE; + regsize -= 3; + size -= 3; + } + } + + // Try to use rrwa y. + if (regsize >= 3) + { + int ex[3] = {-1, -1, -1}; + + for (i = 0; i < n; i++) + { + if (!assigned[i] && aopInReg (result, roffset + i, YL_IDX) && aopInReg (source, soffset + i, YH_IDX)) + ex[0] = i; + if (!assigned[i] && aopInReg (result, roffset + i, YH_IDX) && aopInReg (source, soffset + i, A_IDX)) + ex[1] = i; + if (!assigned[i] && aopInReg (result, roffset + i, A_IDX) && aopInReg (source, soffset + i, YL_IDX)) + ex[2] = i; + } + if (ex[0] >= 0 && ex[1] >= 0 && ex[2] >= 0) + { + emit3w (A_RRWA, ASMOP_Y, 0); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + assigned[ex[2]] = TRUE; + regsize -= 3; + size -= 3; + } + + } + + // Try to use exg a, xl. + if (regsize >= 2) + { + int ex[2] = {-1, -1}; + + i = result->regs[A_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, XL_IDX)) + ex[0] = i; + i = result->regs[XL_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, A_IDX)) + ex[1] = i; + + if (ex[0] >= 0 && ex[1] >= 0) + { + emit2 ("exg", "a, xl"); + cost (1, 1); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + regsize -= 2; + size -= 2; + } + } + + // Try to use exg a, yl. + if (regsize >= 2) + { + int ex[2] = {-1, -1}; + + i = result->regs[A_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, YL_IDX)) + ex[0] = i; + i = result->regs[YL_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, A_IDX)) + ex[1] = i; + + if (ex[0] >= 0 && ex[1] >= 0) + { + emit2 ("exg", "a, yl"); + cost (1, 1); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + regsize -= 2; + size -= 2; + } + } + + // Try to use swapw x. + if (regsize >= 2) + { + int ex[2] = {-1, -1}; + + i = result->regs[XL_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, XH_IDX)) + ex[0] = i; + i = result->regs[XH_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, XL_IDX)) + ex[1] = i; + + if (ex[0] >= 0 && ex[1] >= 0) + { + emit2 ("swapw", "x"); + cost (1, 1); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + regsize -= 2; + size -= 2; + } + } + + // Try to use swapw y. + if (regsize >= 2) + { + int ex[2] = {-1, -1}; + + i = result->regs[YL_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, YH_IDX)) + ex[0] = i; + i = result->regs[YH_IDX] - roffset; + if (i > 0 && i < n && !assigned[i] && aopInReg (source, soffset + i, YL_IDX)) + ex[1] = i; + + if (ex[0] >= 0 && ex[1] >= 0) + { + emit2 ("swapw", "y"); + cost (2, 1); + assigned[ex[0]] = TRUE; + assigned[ex[1]] = TRUE; + regsize -= 2; + size -= 2; + } + } + + // Try to use ldw x, y + { + const int il = result->regs[XL_IDX] - roffset; + const int ih = result->regs[XH_IDX] - roffset; + const bool assign_l = (il >= 0 && il < n && !assigned[il] && aopInReg (source, soffset + il, YL_IDX)); + const bool assign_h = (ih >= 0 && ih < n && !assigned[ih] && aopInReg (source, soffset + ih, YH_IDX)); + if (source->regs[XL_IDX] < 0 && source->regs[XH_IDX] < 0 && + (assign_l && assign_h || assign_l && xh_dead && ih < 0 || assign_h && xl_dead && il < 0)) + { + emit2 ("ldw", "x, y"); + cost (1, 1); + if (assign_l) + { + assigned[il] = TRUE; + regsize--; + size--; + } + if (assign_h) + { + assigned[ih] = TRUE; + regsize--; + size--; + } + } + } + + // Try to use ldw y, x + { + const int il = result->regs[YL_IDX] - roffset; + const int ih = result->regs[YH_IDX] - roffset; + const bool assign_l = (il >= 0 && il < n && !assigned[il] && aopInReg (source, soffset + il, XL_IDX)); + const bool assign_h = (ih >= 0 && ih < n && !assigned[ih] && aopInReg (source, soffset + ih, XH_IDX)); + if (source->regs[YL_IDX] < 0 && source->regs[YH_IDX] < 0 && + (assign_l && assign_h || assign_l && yh_dead && ih < 0 || assign_h && yl_dead && il < 0)) + { + if(x_dead && assign_l && assign_h) + { + emit2 ("exgw", "x, y"); + cost (1, 1); + } + else + { + emit2 ("ldw", "y, x"); + cost (2, 1); + } + if (assign_l) + { + assigned[il] = TRUE; + regsize--; + size--; + } + if (assign_h) + { + assigned[ih] = TRUE; + regsize--; + size--; + } + } + } + + // Clear registers now that would be more expensive to clear later. + if(n >= 1 && !assigned[n - 1] && sizex > n && !assigned[n] && (aopInReg (result, roffset + n - 1, X_IDX) || aopInReg (result, roffset + n - 1, Y_IDX)) && // We want to clear the high byte of x or y. + size - regsize <= 1) // We won't need x or y for stack-to-stack copies. + { + const bool in_y = aopInReg (result, roffset + n - 1, Y_IDX); + const bool yl_free = source->regs[YL_IDX] < soffset || assigned[source->regs[YL_IDX] - soffset]; + const bool yh_free = source->regs[YH_IDX] < soffset || assigned[source->regs[YH_IDX] - soffset]; + const bool xl_free = source->regs[XL_IDX] < soffset || assigned[source->regs[XL_IDX] - soffset]; + const bool xh_free = source->regs[XH_IDX] < soffset || assigned[source->regs[XH_IDX] - soffset]; + const bool y_free = yl_free && yh_free; + const bool x_free = xl_free && xh_free; + + if (in_y ? y_free : x_free) + { + emit3w_o (A_CLRW, result, roffset + n - 1, 0, 0); + assigned[n] = TRUE; + } + } + + while (regsize) + { + // Find lowest byte that can be assigned and needs to be assigned. + for (i = 0; i < n; i++) + { + int j; + + if (assigned[i] || !source->aopu.bytes[soffset + i].in_reg) + continue; + + for (j = 0; j < n; j++) + { + if (!source->aopu.bytes[soffset + j].in_reg || !result->aopu.bytes[roffset + i].in_reg) + continue; + if (!assigned[j] && i != j && result->aopu.bytes[roffset + i].byteu.reg == source->aopu.bytes[soffset + j].byteu.reg) + goto skip_byte; // We can't write this one without overwriting the source. + } + + break; // Found byte that can be written safely. + +skip_byte: + ; + } + + if (i < n) + { + cheapMove (result, roffset + i, source, soffset + i, TRUE); // We can safely assign a byte. + regsize--; + size--; + assigned[i] = TRUE; + continue; + } + + // No byte can be assigned safely (i.e. the assignment is a permutation). + if (!regalloc_dry_run) + wassertl_bt (0, "Unimplemented."); + cost (180, 180); + return; + } + + // Copy (stack-to-stack) what we can with whatever free regs we have now. + a_free = a_dead; + x_free = x_dead; + y_free = y_dead; + for (i = 0; i < n; i++) + { + if (!assigned[i]) + continue; + if (aopInReg (result, roffset + i, A_IDX)) + a_free = FALSE; + else if (aopInReg (result, roffset + i, XL_IDX) || aopInReg (result, roffset + i, XH_IDX)) + x_free = FALSE; + else if (aopInReg (result, roffset + i, YL_IDX) || aopInReg (result, roffset + i, YH_IDX)) + y_free = FALSE; + } + genCopyStack (result, roffset, source, soffset, n, assigned, &size, a_free, x_free, y_free, FALSE); + + // Last, move everything from stack to registers. + for (i = 0; i < n;) + { + if (i < n - 1 && (aopInReg (result, roffset + i, X_IDX) || aopInReg (result, roffset + i, Y_IDX)) && aopOnStackNotExt (source, soffset + i, 2)) + { + wassert_bt (size >= 2); + emit2 ("ldw", aopInReg (result, roffset + i, X_IDX) ? "x, %s" : "y, %s", aopGet2 (source, soffset + i)); + cost (2, 2); + assigned[i] = TRUE; + assigned[i + 1] = TRUE; + if (aopInReg (result, roffset + i, X_IDX)) + x_free = FALSE; + size -= 2; + i += 2; + } + else if (i < n - 1 && aopInReg (result, roffset + i, X_IDX) && aopOnStack (source, soffset + i, 2)) + { + long int eoffset = (long int)(source->aopu.bytes[soffset + i + 1].byteu.stk) + G.stack.size - 256l; + wassertl_bt (regalloc_dry_run || stm8_extend_stack, "Extended stack access, but y not prepared for extended stack access."); + wassertl_bt (regalloc_dry_run || eoffset >= 0l && eoffset <= 0xffffl, "Stack access out of extended stack range."); // Stack > 64K. + + emit2 ("ldw", "x, y"); + cost (1, 1); + emit2 ("ldw", "x, (0x%x, x)", (unsigned)eoffset); + cost (2 + (eoffset > 255), 2); + x_free = FALSE; + size -= 2; + i += 2; + } + // todo: Try to use ldw to load xl, xh, yl, yh when the other half is not in use. + else if (aopRS (result) && !aopOnStack (result, roffset + i, 1) && aopOnStack (source, soffset + i, 1)) + { + wassert_bt (size >= 1); + cheapMove (result, roffset + i, source, soffset + i, !a_free); + assigned[i] = TRUE; + if (aopInReg (result, roffset + i, A_IDX)) + a_free = FALSE; + if (aopInReg (result, roffset + i, XL_IDX) || aopInReg (result, roffset + i, XH_IDX)) + x_free = FALSE; + size--; + i++; + } + else // This byte is not a stack-to-register copy. + i++; + } + + // Free a reg to copy (stack-to-stack) whatever is left. + if (size) + { + a_free = a_dead && (result->regs[A_IDX] < 0 || result->regs[A_IDX] >= roffset + source->size); + if (!a_free) + push (ASMOP_A, 0, 1); + genCopyStack (result, roffset, source, soffset, n, assigned, &size, TRUE, x_free, y_free, TRUE); + if (!a_free) + pop (ASMOP_A, 0, 1); + } + + wassertl_bt (size >= 0, "genCopy() copied more than there is to be copied."); + + a_free = a_dead && (result->regs[A_IDX] < 0 || result->regs[A_IDX] >= roffset + source->size); + + // Place leading zeroes. + for (i = source->size - soffset; i < sizex;) + { + if (assigned[i]) + { + i++; + continue; + } + else if (i + 1 < sizex && !assigned[i + 1] && (aopInReg (result, roffset + i, X_IDX) || aopInReg (result, roffset + i, Y_IDX))) + { + if (aopInReg (result, roffset + i, X_IDX)) + emit3w (A_CLRW, ASMOP_X, 0); + else if (aopInReg (result, roffset + i, Y_IDX)) + emit3w (A_CLRW, ASMOP_Y, 0); + i += 2; + } + else if (y_dead && aopIsLitVal (source, soffset + i + 1, 1, 0x00) && + (aopInReg (result, roffset + i, YL_IDX) && result->regs[YH_IDX] < 0 || aopInReg (result, roffset + i, YH_IDX) && result->regs[YL_IDX] < 0)) + { + emit3w (A_CLRW, ASMOP_Y, 0); + i++; + } + else + { + cheapMove (result, roffset + i, ASMOP_ZERO, 0, !a_free); + assigned[i] = TRUE; + if (aopInReg (result, roffset + i, A_IDX)) + a_free = FALSE; + i++; + } + } + + if (size) + { + if (!regalloc_dry_run) + { + wassertl_bt (0, "genCopy failed to completely copy operands."); + fprintf (stderr, "%d bytes left.\n", size); + fprintf (stderr, "left type %d source type %d\n", result->type, source->type); + for (i = 0; i < n ; i++) + fprintf (stderr, "Byte %d, result in reg %d, source in reg %d. %s assigned.\n", i, result->aopu.bytes[roffset + i].in_reg ? result->aopu.bytes[roffset + i].byteu.reg->rIdx : -1, source->aopu.bytes[soffset + i].in_reg ? source->aopu.bytes[soffset + i].byteu.reg->rIdx : -1, assigned[i] ? "" : "not"); + } + cost (180, 180); + } +} + +/*-----------------------------------------------------------------*/ +/* genMove_o - Copy part of one asmop to another */ +/*-----------------------------------------------------------------*/ +static void +genMove_o (asmop *result, int roffset, asmop *source, int soffset, int size, bool a_dead_global, bool x_dead_global, bool y_dead_global) +{ + int i; + + bool clr_x = FALSE, clr_y = FALSE; + + wassertl_bt (result->type != AOP_LIT, "Trying to write to literal."); + wassertl_bt (result->type != AOP_IMMD, "Trying to write to immediate."); + wassertl_bt (roffset + size <= result->size, "Trying to write beyond end of operand"); + +#if 0 + D (emit2("; genMove_o", "offset %d %d, size %d, deadness %d %d %d", roffset, soffset, size, a_dead_global, x_dead_global, y_dead_global)); +#endif + + if (aopRS (result) && aopRS (source)) + { + genCopy (result, roffset, source, soffset, size, a_dead_global, x_dead_global, y_dead_global); + return; + } + + if (result->type == AOP_DIR && source->type == AOP_DIR && roffset == soffset && !strcmp(result->aopu.aop_dir, source->aopu.aop_dir)) + return; + + for (i = 0; i < size;) + { + const bool x_dead = x_dead_global && + (!aopRS (result) || (result->regs[XL_IDX] >= (roffset + i) || result->regs[XL_IDX] < 0) && (result->regs[XH_IDX] >= (roffset + i) || result->regs[XH_IDX] < 0)) && + (!aopRS (source) || source->regs[XL_IDX] <= i + 1 && source->regs[XH_IDX] <= i + 1); + const bool y_dead = y_dead_global && + (!aopRS (result) || (result->regs[YL_IDX] >= (roffset + i) || result->regs[YL_IDX] < 0) && (result->regs[YH_IDX] >= (roffset + i) || result->regs[YH_IDX] < 0)) && + (!aopRS (source) || source->regs[YL_IDX] <= i + 1 && source->regs[YH_IDX] <= i + 1); + const bool a_dead = a_dead_global && + (!aopRS (result) || (result->regs[A_IDX] >= (roffset + i) || result->regs[A_IDX] < 0)) && + (!aopRS (source) || source->regs[A_IDX] <= i); + + if (i + 1 < size && (aopInReg (result, roffset + i, X_IDX) || aopInReg (result, roffset + i, Y_IDX)) && aopIsLitVal (source, soffset + i, 2, 0x0000)) + { + if (aopInReg (result, roffset + i, X_IDX) && !clr_x) + { + emit3w (A_CLRW, ASMOP_X, 0); + clr_x = TRUE; + } + else if (aopInReg (result, roffset + i, Y_IDX) && !clr_y) + { + emit3w (A_CLRW, ASMOP_Y, 0); + clr_y = TRUE; + } + i += 2; + } + else if (i + 1 < size && i >= 2 && source->type == AOP_LIT && aopIsLitVal (source, soffset + i, 2, byteOfVal (source->aopu.aop_lit, soffset + i - 2) + byteOfVal (source->aopu.aop_lit, soffset + i - 1) * 256) && + (aopInReg (result, roffset + i, X_IDX) && aopInReg (result, roffset + i - 2, Y_IDX) || aopInReg (result, roffset + i, Y_IDX) && aopInReg (result, roffset + i - 2, X_IDX))) + { + emit2 ("ldw", "%s, %s", aopGet2 (result, roffset + i), aopGet2 (result, roffset + i - 2)); + cost (1 + aopInReg (result, roffset + i, Y_IDX), 1); + i += 2; + } + else if (i + 1 < size && aopInReg (result, roffset + i, X_IDX) && (aopIsLitVal (source, soffset + i, 2, 0x0001) || aopIsLitVal (source, soffset + i, 2, 0xffff))) + { + bool dec = aopIsLitVal (source, soffset + i, 2, 0xffff); + emit3w (A_CLRW, ASMOP_X, 0); + emit3w (dec ? A_DECW : A_INCW, ASMOP_X, 0); + i += 2; + } + else if (i + 1 < size && aopInReg (result, roffset + i, X_IDX) && + (source->type == AOP_LIT || source->type == AOP_DIR && soffset + i + 1 < source->size || source->type == AOP_IMMD)) + { + emit2 ("ldw", "x, %s", aopGet2 (source, soffset + i)); + cost (3, 2); + clr_x = FALSE; + i += 2; + } + else if (i + 1 < size && aopInReg (result, roffset + i, Y_IDX) && + (source->type == AOP_LIT || source->type == AOP_DIR && soffset + i + 1 < source->size || source->type == AOP_IMMD)) + { + emit2 ("ldw", "y, %s", aopGet2 (source, soffset + i)); + cost (4, 2); + clr_y = FALSE; + i += 2; + } + else if (i + 1 < size && result->type == AOP_DIR && aopInReg (source, soffset + i, X_IDX)) + { + emit2 ("ldw", "%s, x", aopGet2 (result, roffset + i)); + cost (3, 2); + i += 2; + } + else if (i + 1 < size && result->type == AOP_DIR && aopInReg (source, soffset + i, Y_IDX)) + { + emit2 ("ldw", "%s, y", aopGet2 (result, roffset + i)); + cost (4, 2); + i += 2; + } + else if (x_dead && i + 1 < size && + (aopOnStack (result, roffset + i, 2) || result->type == AOP_DIR) && + (aopOnStackNotExt (source, soffset + i, 2) || source->type == AOP_LIT || source->type == AOP_DIR && soffset + i + 1 < source->size || source->type == AOP_IMMD)) + { + if (aopIsLitVal (source, soffset + i, 2, 0x0000)) + { + if (!clr_x) + emit3w (A_CLRW, ASMOP_X, 0); + clr_x = TRUE; + } + else + { + emit2 ("ldw", "x, %s", aopGet2 (source, soffset + i)); + cost (3, 2); + clr_x = FALSE; + } + emit2 ("ldw", "%s, x", aopGet2 (result, roffset + i)); + cost (2, 2); + i += 2; + } + else if (i + 1 < size && aopIsLitVal (source, soffset + i + 1, 1, 0x00) && (aopInReg (result, roffset + i, X_IDX) || aopInReg (result, roffset + i, Y_IDX))) + { + emit3w_o (A_CLRW, result, roffset + i, 0, 0); + cheapMove (result, roffset + i, source, soffset + i, !a_dead); + i += 2; + } + else if ((!aopRS (result) || aopOnStack(result, roffset + i, 1) || aopInReg (result, roffset + i, A_IDX)) && aopIsLitVal (source, soffset + i, 1, 0x00)) + { + emit3_o (A_CLR, result, roffset + i, 0, 0); + i++; + } + else if (y_dead && aopOnStack (result, roffset + i, 2) && + (source->type == AOP_LIT || source->type == AOP_DIR && soffset + i + 1 < source->size || source->type == AOP_IMMD)) + { + if (aopIsLitVal (source, soffset + i, 2, 0x0000)) + { + if (!clr_y) + emit3w (A_CLRW, ASMOP_Y, 0); + clr_y = TRUE; + } + else + { + emit2 ("ldw", "y, %s", aopGet2 (source, soffset + i)); + cost (4, 2); + clr_y = FALSE; + } + emit2 ("ldw", "%s, y", aopGet2 (result, roffset + i)); + cost (2, 2); + i += 2; + } + else if (y_dead && i + 1 < size && aopOnStack (source, soffset + i, 2) && source->type == AOP_DIR) + { + emit2 ("ldw", "y, %s", aopGet2 (source, soffset + i)); + emit2 ("ldw", "%s, y", aopGet2 (result, roffset + i)); + cost (6, 4); + clr_y = FALSE; + i += 2; + } + else + { + cheapMove (result, roffset + i, source, soffset + i, !a_dead); + i++; + } + } +} + +/*-----------------------------------------------------------------*/ +/* genMove - Copy the value from one asmop to another */ +/*-----------------------------------------------------------------*/ +static void +genMove (asmop *result, asmop *source, bool a_dead, bool x_dead, bool y_dead) +{ + genMove_o (result, 0, source, 0, result->size, a_dead, x_dead, y_dead); +} + +/*---------------------------------------------------------------------*/ +/* stm8_emitDebuggerSymbol - associate the current code location */ +/* with a debugger symbol */ +/*---------------------------------------------------------------------*/ +void +stm8_emitDebuggerSymbol (const char *debugSym) +{ + G.debugLine = 1; + emit2 ("", "%s ==.", debugSym); + G.debugLine = 0; +} + +/*-----------------------------------------------------------------*/ +/* isLiteralBit - test if lit == 2^n */ +/*-----------------------------------------------------------------*/ +static int +isLiteralBit (unsigned long lit) +{ + unsigned long pw[32] = + { + 1L, 2L, 4L, 8L, 16L, 32L, 64L, 128L, + 0x100L, 0x200L, 0x400L, 0x800L, + 0x1000L, 0x2000L, 0x4000L, 0x8000L, + 0x10000L, 0x20000L, 0x40000L, 0x80000L, + 0x100000L, 0x200000L, 0x400000L, 0x800000L, + 0x1000000L, 0x2000000L, 0x4000000L, 0x8000000L, + 0x10000000L, 0x20000000L, 0x40000000L, 0x80000000L + }; + int idx; + + for (idx = 0; idx < 32; idx++) + if (lit == pw[idx]) + return idx; + return -1; +} + +/*-----------------------------------------------------------------*/ +/* genNot - generates code for ! */ +/*-----------------------------------------------------------------*/ +static void +genNot (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + int i; + int pushed_a = false; + + D (emit2 ("; genNot", "")); + + aopOp (left, ic); + aopOp (result, ic); + + for (i = 1; i < left->aop->size; i++) + if (aopInReg (left->aop, i, A_IDX)) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + break; + } + + if (!regDead (A_IDX, ic) && !pushed_a) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + } + for (i = 0; i < left->aop->size;) + { + if (i == 0 && !IS_FLOAT (operandType (left)) && + (aopInReg (left->aop, i, X_IDX) || aopInReg (left->aop, i, Y_IDX) && regDead (Y_IDX, ic))) + { + if (aopInReg (left->aop, i, Y_IDX)) + { + emit2 ("subw", "y, #0x0001"); + cost (4, 2); + } + else + { + emit2 ("cpw", "x, #0x0001"); + cost (3, 2); + } + i += 2; + } + else if (i == 0 && i + 1 < left->aop->size && !IS_FLOAT (operandType (left)) && regDead (X_IDX, ic) && + (aopOnStack (left->aop, i, 2) && left->aop->regs[XL_IDX] < 0 && left->aop->regs[XH_IDX] < 0 || left->aop->type == AOP_DIR)) + { + genMove_o (ASMOP_X, 0, left->aop, i, 2, true, true, false); + emit2 ("subw", "x, #0x0001"); + cost (3, 2); + i += 2; + } + else + { + if (i && aopInReg (left->aop, i, A_IDX)) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, left->aop, i, false); + if (IS_FLOAT (operandType (left)) && i == left->aop->size - 1) + { + emit2 ("and", "a, #0x7f"); + cost (2, 1); + } + if (!i) + emit3 (A_SUB, ASMOP_A, ASMOP_ONE); + else + emit3 (A_SBC, ASMOP_A, ASMOP_ZERO); + i++; + } + } + + if (result->aop->size == 2 && (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX))) + { + emit3 (A_CLRW, result->aop, 0); + emit3 (A_RLCW, result->aop, 0); + } + else + { + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_RLC, ASMOP_A, 0); + + cheapMove (result->aop, 0, ASMOP_A, 0, false); + + for (i = 1; i < result->aop->size; i++) + cheapMove (result->aop, 0, ASMOP_ZERO, 0, true); + } + + if (pushed_a) + if (!regDead (A_IDX, ic) || result->aop->regs[A_IDX] < 0) + pop (ASMOP_A, 0, 1); + else + adjustStack (1, false, false, false); + + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genCpl - generate code for complement */ +/*-----------------------------------------------------------------*/ +static void +genCpl (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + int left_in_a = 0; + bool result_in_a = FALSE; + bool destroyed_a = FALSE; + bool pushed_a = FALSE; + bool result_pushed = FALSE; + int i, size; + + D (emit2 ("; genCpl", "")); + + aopOp (left, ic); + aopOp (result, ic); + + size = result->aop->size; + + for (i = 1; i < left->aop->size; i++) + if (aopInReg (left->aop, i, A_IDX)) + { + left_in_a = i; + break; + } + + for (i = 0; i < size;) + { + // todo: Complement in source where dead and more efficient. + if (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX)) + { + const bool x_free = regDead (X_IDX, ic) && + left->aop->regs[XL_IDX] < i && left->aop->regs[XH_IDX] < i && + (result->aop->regs[XL_IDX] < 0 || result->aop->regs[XL_IDX] >= i) && (result->aop->regs[XH_IDX] < 0 || result->aop->regs[XH_IDX] >= i); + const bool y_free = regDead (Y_IDX, ic) && + left->aop->regs[YL_IDX] < i && left->aop->regs[YH_IDX] < i && + (result->aop->regs[YL_IDX] < 0 || result->aop->regs[YL_IDX] >= i) && (result->aop->regs[YH_IDX] < 0 || result->aop->regs[YH_IDX] >= i); + genMove_o (result->aop, i, left->aop, i, 2, (regDead (A_IDX, ic) || pushed_a) && !result_in_a && !(left_in_a > i), x_free, y_free); + + emit3w_o (A_CPLW, result->aop, i, 0, 0); + + i += 2; + } + else if ((aopOnStack (result->aop, i, 1) || result->aop->type == AOP_DIR) && aopSame (result->aop, i, left->aop, i, 1)) + { + emit3_o (A_CPL, result->aop, i, 0, 0); + i++; + } + else + { + bool pushed_left = destroyed_a && aopInReg (left->aop, i, A_IDX); + + if ((left_in_a > i || !regDead (A_IDX, ic) || result_in_a) && !pushed_a) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + if (result_in_a) + { + result_in_a = FALSE; + result_pushed = TRUE; + } + } + + if (pushed_left && !regDead (A_IDX, ic)) + { + pop (ASMOP_A, 0, 1); + pushed_a = FALSE; + } + else if (pushed_left) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + + destroyed_a = TRUE; + + emit3 (A_CPL, ASMOP_A, 0); + + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + + if (aopInReg (result->aop, i, A_IDX)) + result_in_a = TRUE; + + i++; + } + } + + if (pushed_a && !regDead (A_IDX, ic) || result_pushed) + pop (ASMOP_A, 0, 1); + else if (pushed_a) + adjustStack (1, FALSE, FALSE, FALSE); + + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genSub - generates code for subtraction */ +/*-----------------------------------------------------------------*/ +static void +genSub (const iCode *ic, asmop *result_aop, asmop *left_aop, asmop *right_aop) +{ + int size, i, j; + bool started; + bool pushed_a = FALSE; + bool result_in_a = FALSE; + + size = result_aop->size; + + for (i = 0, started = FALSE; i < size;) + { + bool a_free = regDead (A_IDX, ic) && left_aop->regs[A_IDX] <= i && right_aop->regs[A_IDX] <= i && !result_in_a || pushed_a; + bool xl_free = regDead (XL_IDX, ic) && (result_aop->regs[XL_IDX] >= i || result_aop->regs[XL_IDX] < 0) && left_aop->regs[XL_IDX] <= i + 1 && right_aop->regs[XL_IDX] < i; + bool xh_free = regDead (XH_IDX, ic) && (result_aop->regs[XH_IDX] >= i || result_aop->regs[XH_IDX] < 0) && left_aop->regs[XH_IDX] <= i + 1 && right_aop->regs[XH_IDX] < i; + bool x_free = xl_free && xh_free; + bool yl_free = regDead (YL_IDX, ic) && (result_aop->regs[YL_IDX] >= i || result_aop->regs[YL_IDX] < 0) && left_aop->regs[YL_IDX] <= i + 1 && right_aop->regs[YL_IDX] < i; + bool yh_free = regDead (YH_IDX, ic) && (result_aop->regs[YH_IDX] >= i || result_aop->regs[YH_IDX] < 0) && left_aop->regs[YH_IDX] <= i + 1 && right_aop->regs[YH_IDX] < i; + bool y_free = yl_free && yh_free; + + if (!started && left_aop->type == AOP_LIT && !byteOfVal (left_aop->aopu.aop_lit, i) && + (!byteOfVal (left_aop->aopu.aop_lit, i + 1) && (aopInReg (result_aop, i, X_IDX) || aopInReg (result_aop, i, Y_IDX)) || + !started && i == size - 1 && (aopInReg (result_aop, i, XL_IDX) && regDead (XH_IDX, ic) && right_aop->regs[XH_IDX] < 0 && result_aop->regs[XH_IDX] < 0 || aopInReg (result_aop, i, YL_IDX) && regDead (YH_IDX, ic) && right_aop->regs[YH_IDX] < 0 && result_aop->regs[YH_IDX] < 0))) + { + bool half = (i == size - 1); + bool x = aopInReg (result_aop, i, half ? XL_IDX : X_IDX); + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, right_aop, i, 2 - half, a_free, x, !x); + emit3w (A_NEGW, x ? ASMOP_X : ASMOP_Y, 0); + started = TRUE; + i += 2; + } + else if (!started && + aopOnStack (result_aop, i, 2) && aopOnStack (right_aop, i, 2) && result_aop->aopu.bytes[i].byteu.stk == right_aop->aopu.bytes[i].byteu.stk && result_aop->aopu.bytes[i + 1].byteu.stk == right_aop->aopu.bytes[i + 1].byteu.stk && + aopIsLitVal (right_aop, i, 2, 0x0000)) + { + emit3w_o (A_NEGW, result_aop, i, 0, 0); + started = TRUE; + i += 2; + } + // We can use incw / decw only for the only, top non-zero word, since it neither takes into account an existing carry nor does it update the carry. + else if (!started && i == size - 2 && + (aopInReg (result_aop, i, X_IDX) || aopInReg (result_aop, i, Y_IDX)) && + right_aop->type == AOP_LIT && !byteOfVal (right_aop->aopu.aop_lit, i + 1) && + byteOfVal (right_aop->aopu.aop_lit, i) <= 1 + aopInReg (result_aop, i, X_IDX) || + !started && i == size - 1 && + !(aopInReg (left_aop, i, A_IDX) && regDead (A_IDX, ic)) && + (aopInReg (result_aop, i, XL_IDX) && regDead (XH_IDX, ic) && left_aop->regs[XH_IDX] < 0 && result_aop->regs[XH_IDX] < 0 || aopInReg (result_aop, i, YL_IDX) && regDead (YH_IDX, ic) && left_aop->regs[YH_IDX] < 0 && result_aop->regs[YH_IDX] < 0) && + right_aop->type == AOP_LIT && byteOfVal (right_aop->aopu.aop_lit, i) <= 1 + aopInReg (result_aop, i, XL_IDX)) + { + bool half = (i == size - 1); + bool x = aopInReg (result_aop, i, half ? XL_IDX : X_IDX); + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, left_aop, i, 2 - half, a_free, x, !x); + for (j = 0; j < byteOfVal (right_aop->aopu.aop_lit, i); j++) + emit3w (A_DECW, x ? ASMOP_X : ASMOP_Y, 0); + cost (x ? 1 : 2, 1); + started = TRUE; + i += 2; + } + // In some cases we gain so much by using decw that it is worth handling the carry explictly. + else if (started && i == size - 2 && (aopInReg (result_aop, i, X_IDX) || aopInReg (result_aop, i, Y_IDX)) && aopIsLitVal (left_aop, i, 2, 0x0000) && + (aopOnStack (right_aop, i, 2) || right_aop->type == AOP_DIR)) + { + bool x = aopInReg (result_aop, i, X_IDX); + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, right_aop, i, 2, a_free, x_free, y_free); + symbol *tlbl = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + if (!regalloc_dry_run) + emit2 ("jrnc", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 1); + emit3w_o (A_INCW, result_aop, i, 0, 0); + emitLabel (tlbl); + emit3w_o (A_NEGW, result_aop, i, 0, 0); + i += 2; + } + else if (!started && + (aopInReg (result_aop, i, X_IDX) || aopInReg (result_aop, i, Y_IDX)) && + (right_aop->type == AOP_LIT || right_aop->type == AOP_IMMD || aopOnStackNotExt (right_aop, i, 2) || right_aop->type == AOP_DIR && i + 1 < right_aop->size)) + { + bool x = aopInReg (result_aop, i, X_IDX); + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, left_aop, i, 2, a_free, x, !x); + if (!aopIsLitVal (right_aop, i, 2, 0x0000)) + { + emit2 ("subw", x ? "x, %s" : "y, %s", aopGet2 (right_aop, i)); + cost ((x || aopOnStack (right_aop, 0, 2)) ? 3 : 4, 2); + started = TRUE; + } + i += 2; + } + else if (!started && i == size - 2 && aopInReg (right_aop, i, X_IDX) && aopInReg (result_aop, i, X_IDX) && + (left_aop->type == AOP_DIR || aopOnStackNotExt (left_aop, i, 2))) + { + emit3w (A_NEGW, ASMOP_X, 0); + emit2 ("addw", "x, %s", aopGet2 (left_aop, i)); + cost (4, 2); + started = TRUE; + i += 2; + } + else if (!started && aopIsLitVal (left_aop, i, 1, 0x00) && + (aopOnStack (result_aop, i, 1) || result_aop->type == AOP_DIR) && aopSame (result_aop, i, right_aop, i, 1)) + { + emit3_o (A_NEG, result_aop, i, 0, 0); + started = TRUE; + i++; + } + else if (!started && i == size - 1 && + (aopOnStack (result_aop, i, 1) || result_aop->type == AOP_DIR) && aopSame (result_aop, i, left_aop, i, 1) && + right_aop->type == AOP_LIT && byteOfVal (right_aop->aopu.aop_lit, i) <= 2 + !a_free) + { + for (j = 0; j < byteOfVal (right_aop->aopu.aop_lit, i); j++) + emit3_o (A_DEC, result_aop, i, 0, 0); + i++; + } + else if (!started && i == size - 1 && + (aopOnStack (result_aop, i, 1) || result_aop->type == AOP_DIR) && aopSame (result_aop, i, left_aop, i, 1) && + right_aop->type == AOP_LIT && byteOfVal (right_aop->aopu.aop_lit, i) >= 254 - !a_free) + { + for (j = byteOfVal (right_aop->aopu.aop_lit, i); j < 256; j++) + emit3_o (A_INC, result_aop, i, 0, 0); + i++; + } + else if (!started && i + 1 < size && (x_free || aopInReg(left_aop, i, X_IDX) && regDead (X_IDX, ic)) && + (aopOnStackNotExt (right_aop, i, 2) || right_aop->type == AOP_LIT || right_aop->type == AOP_IMMD || right_aop->type == AOP_DIR && i + 1 < right_aop->size) && + ((aopOnStack (result_aop, i, 2) || result_aop->type == AOP_DIR) && (aopRS (left_aop) && !aopInReg(left_aop, i, A_IDX) && !aopInReg(left_aop, i + 1, A_IDX) || left_aop->type == AOP_DIR) || + aopInReg(left_aop, i, X_IDX) && aopInReg(result_aop, i, Y_IDX) || + aopInReg(left_aop, i, X_IDX) && (result_aop->regs[XL_IDX] < 0 || result_aop->regs[XL_IDX] >= i) && (result_aop->regs[XH_IDX] < 0 || result_aop->regs[XH_IDX] >= i) && (aopInReg(left_aop, i, XL_IDX) || aopInReg(left_aop, i + 1, XH_IDX) || aopInReg(left_aop, i, XH_IDX) && aopInReg(left_aop, i + 1, XL_IDX)))) + { + genMove_o (ASMOP_X, 0, left_aop, i, 2, a_free, TRUE, FALSE); + if (i == size - 2 && right_aop->type == AOP_LIT && byteOfVal (right_aop->aopu.aop_lit, i) <= 2 && !byteOfVal (right_aop->aopu.aop_lit, i + 1)) + for (j = 0; j < byteOfVal (right_aop->aopu.aop_lit, i); j++) + emit3w (A_DECW, ASMOP_X, 0); + else + { + emit2 ("subw", "x, %s", aopGet2 (right_aop, i)); + cost (3 + (right_aop->type == AOP_DIR), 2); + } + genMove_o (result_aop, i, ASMOP_X, 0, 2, a_free, TRUE, FALSE); + if (aopInReg (result_aop, i, A_IDX) || aopInReg (result_aop, i + 1, A_IDX)) + result_in_a = TRUE; + started = TRUE; + i += 2; + } + else if (!started && aopIsLitVal (left_aop, i, 1, 0x00)) + { + if (!a_free) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + result_in_a = FALSE; + } + + cheapMove (ASMOP_A, 0, right_aop, i, FALSE); + emit3 (A_NEG, ASMOP_A, 0); + cheapMove (result_aop, i, ASMOP_A, 0, FALSE); + + started = TRUE; + + if (aopInReg (result_aop, i, A_IDX)) + result_in_a = TRUE; + + i++; + } + else if (!started && i + 1 < size && (y_free || aopInReg(left_aop, i, Y_IDX) && regDead (Y_IDX, ic)) && + (aopOnStack (result_aop, i, 2) || result_aop->type == AOP_DIR) && + (aopOnStack (left_aop, i, 2) || aopInReg(left_aop, i, Y_IDX) || left_aop->type == AOP_DIR) && + (aopOnStackNotExt (right_aop, i, 2) || right_aop->type == AOP_LIT || right_aop->type == AOP_IMMD || right_aop->type == AOP_DIR && i + 1 < right_aop->size)) + { + genMove_o (ASMOP_Y, 0, left_aop, i, 2, a_free, TRUE, FALSE); + if (i == size - 2 && right_aop->type == AOP_LIT && byteOfVal (right_aop->aopu.aop_lit, i) <= 2 && !byteOfVal (right_aop->aopu.aop_lit, i + 1)) + for (j = 0; j < byteOfVal (right_aop->aopu.aop_lit, i); j++) + emit3w (A_DECW, ASMOP_Y, 0); + else + { + emit2 ("subw", "y, %s", aopGet2 (right_aop, i)); + cost (4 - aopOnStack (right_aop, i, 2), 2); + } + genMove_o (result_aop, i, ASMOP_Y, 0, 2, a_free, TRUE, FALSE); + if (aopInReg (result_aop, i, A_IDX) || aopInReg (result_aop, i + 1, A_IDX)) + result_in_a = TRUE; + started = TRUE; + i += 2; + } + else if (!started && right_aop->type == AOP_LIT && + (aopInReg (left_aop, i, XH_IDX) && aopInReg (result_aop, i, XH_IDX) || aopInReg (left_aop, i, YH_IDX) && aopInReg (result_aop, i, YH_IDX))) + { + emit2 ("subw", "%s, #%d", aopInReg (left_aop, i, YH_IDX) ? "y" : "x", byteOfVal (right_aop->aopu.aop_lit, i) << 8); + cost (3 + aopInReg (left_aop, i, YH_IDX), 2); + started = TRUE; + i++; + } + else if (!started && i == size - 1 && right_aop->type == AOP_LIT && // For yl, we only save a cycle comapred to the normal way. + (aopInReg (left_aop, i, XL_IDX) && aopInReg (result_aop, i, XL_IDX) && xh_free || aopInReg (left_aop, i, YL_IDX) && aopInReg (result_aop, i, YL_IDX) && yh_free)) + { + emit2 ("subw", "%s, #%d", aopInReg (left_aop, i, YL_IDX) ? "y" : "x", byteOfVal (right_aop->aopu.aop_lit, i)); + cost (3 + aopInReg (left_aop, i, YL_IDX), 2); + started = TRUE; + i++; + } + else if (!started && i == size - 1 && aopOnStackNotExt (right_aop, i, 1) && + (aopInReg (left_aop, i, XL_IDX) && aopInReg (result_aop, i, XL_IDX) && xh_free || aopInReg (left_aop, i, YL_IDX) && aopInReg (result_aop, i, YL_IDX) && yh_free)) + { + emit2 ("subw", "%s, (%d, sp)", aopInReg (left_aop, i, YL_IDX) ? "y" : "x", right_aop->aopu.bytes[i].byteu.stk + G.stack.pushed - 1); + cost (3, 2); + started = TRUE; + i++; + } + else if (aopInReg (right_aop, i, A_IDX)) // Needs special handling as generic code below would overwrite a. + { + if (!pushed_a) + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, left_aop, i, false); + emit2 (started ? "sbc" : "sub", "a, (1, sp)"); + cost (2, 1); + if (aopInReg (result_aop, i, A_IDX)) + { + adjustStack (1, false, false, false); + pushed_a = false; + } + else + { + cheapMove (result_aop, i, ASMOP_A, 0, false); + pushed_a = true; + } + i++; + } + else + { + if (pushed_a && left_aop->regs[A_IDX] == i && regDead (A_IDX, ic)) + { + pop (ASMOP_A, 0, 1); + pushed_a = FALSE; + } + else if (!a_free) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + result_in_a = FALSE; + } + + if (left_aop->regs[A_IDX] == i && pushed_a) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, left_aop, i, FALSE); + + if (!started && aopIsLitVal (right_aop, i, 1, 0)) + ; // Skip over this byte. + else if (!started && i + 1 == size && aopIsLitVal (right_aop, i, 1, 1)) + emit3 (A_DEC, ASMOP_A, 0); + else + { + const asmop *right_stacked = NULL; + int right_offset; + + right_stacked = stack_aop (right_aop, i, &right_offset); + + if (!right_stacked) + emit3_o (started ? A_SBC : A_SUB, ASMOP_A, 0, right_aop, i); + else + { + emit2 (started ? "sbc" : "sub", "a, (#%d, sp)", right_offset); + cost (2, 1); + } + + if (right_stacked) + pop (right_stacked, 0, 2); + + started = TRUE; + } + + cheapMove (result_aop, i, ASMOP_A, 0, FALSE); + + if (aopInReg (result_aop, i, A_IDX)) + result_in_a = TRUE; + + i++; + } + } + + if (pushed_a && !result_in_a) + pop (ASMOP_A, 0, 1); + else if (pushed_a) + adjustStack (1, FALSE, FALSE, FALSE); +} + +/*-----------------------------------------------------------------*/ +/* genUminus - generates code for unary minus */ +/*-----------------------------------------------------------------*/ +static void +genUminusFloat (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + bool move_all; + + D (emit2 ("; genUminusFloat", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + move_all = aopRS(left->aop) && left->aop->regs[A_IDX] >= 0 && aopRS(result->aop) && result->aop->regs[A_IDX] >= 0 && result->aop->regs[A_IDX] < result->aop->size - 1 || + (aopInReg (result->aop, result->aop->size - 2, X_IDX) || aopInReg (result->aop, result->aop->size - 2, Y_IDX)) && aopOnStack (left->aop, result->aop->size - 2, 2); + + genMove_o (result->aop, 0, left->aop, 0, result->aop->size - 1 + move_all, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + if (aopInReg (result->aop, result->aop->size - 1, YH_IDX) && (move_all || aopInReg (left->aop, result->aop->size - 1, YH_IDX)) || + aopInReg (result->aop, result->aop->size - 1, XH_IDX) && (move_all || aopInReg (left->aop, result->aop->size - 1, XH_IDX))) + { + const bool use_y = aopInReg (result->aop, result->aop->size - 1, YH_IDX); + emit3w (A_SLLW, use_y ? ASMOP_Y : ASMOP_X, 0); + emit2 ("ccf", ""); + cost (1, 1); + emit3w (A_RRCW, use_y ? ASMOP_Y : ASMOP_X, 0); + } + // todo: Use bcpl. use swap_to_a for left in same reg as right. + else + { + if (!regDead(A_IDX, ic) || aopRS(result->aop) && result->aop->regs[A_IDX] >= 0 && result->aop->regs[A_IDX] < result->aop->size - 1) + push (ASMOP_A, 0, 1); + + cheapMove (ASMOP_A, 0, (move_all ? result: left)->aop, left->aop->size - 1, FALSE); + emit2 ("xor", "a, #0x80"); + cost (2, 1); + cheapMove (result->aop, result->aop->size - 1, ASMOP_A, 0, FALSE); + + if (!regDead(A_IDX, ic) || aopRS(result->aop) && result->aop->regs[A_IDX] >= 0 && result->aop->regs[A_IDX] < result->aop->size - 1) + pop (ASMOP_A, 0, 1); + } + + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genUminus - generates code for unary minus */ +/*-----------------------------------------------------------------*/ +static void +genUminus (const iCode *ic) +{ + operand *result; + operand *left; + + if (IS_FLOAT (operandType (IC_LEFT (ic)))) + { + genUminusFloat (ic); + return; + } + + result = IC_RESULT (ic); + left = IC_LEFT (ic); + + D (emit2 ("; genUminus", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + genSub (ic, result->aop, ASMOP_ZERO, left->aop); + + freeAsmop (left); + freeAsmop (result); +} + +static void +saveRegsForCall (const iCode * ic) +{ + if (G.saved && !regalloc_dry_run) + return; + + //if (!regDead (C_IDX, ic)) + // push (ASMOP_C, 0, 1); + + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + if (!regDead (X_IDX, ic)) + push (ASMOP_X, 0, 2); + + if (!regDead (Y_IDX, ic)) + push (ASMOP_Y, 0, 2); + + G.saved = TRUE; +} + +/*-----------------------------------------------------------------*/ +/* genIpush - generate code for pushing this gets a little complex */ +/*-----------------------------------------------------------------*/ +static void +genIpush (const iCode * ic) +{ + int size, offset = 0; + iCode *walk; + + D (emit2 ("; genIPush", "")); + + if (!ic->parmPush) + { + wassertl (0, "Encountered an unsupported spill push."); + return; + } + + /* Caller saves, and this is the first iPush. */ + /* Scan ahead until we find the function that we are pushing parameters to. + Count the number of addSets on the way to figure out what registers + are used in the send set. + */ + for (walk = ic->next; walk->op != CALL && walk->op != PCALL; walk = walk->next); + if (!G.saved && !regalloc_dry_run /* Cost is counted at CALL or PCALL instead */ ) + saveRegsForCall (walk); + + /* then do the push */ + aopOp (IC_LEFT (ic), ic); + + for (size = IC_LEFT (ic)->aop->size, offset = 0; size;) + { + if (aopInReg (IC_LEFT (ic)->aop, offset, X_IDX) || aopInReg (IC_LEFT (ic)->aop, offset, Y_IDX)) + { + push (IC_LEFT (ic)->aop, offset, 2); + offset += 2; + size -= 2; + } + // Going through x is more efficient than two individual pushes for some cases. + else if (size >= 2 && regDead (X_IDX, ic) && IC_LEFT (ic)->aop->regs[XL_IDX] < offset && IC_LEFT (ic)->aop->regs[XH_IDX] < offset && + (aopIsLitVal (IC_LEFT (ic)->aop, offset, 2, 0x0000) || IC_LEFT (ic)->aop->type == AOP_DIR && optimize.codeSize || aopOnStack (IC_LEFT (ic)->aop, offset, 2))) + { + genMove_o (ASMOP_X, 0, IC_LEFT (ic)->aop, offset, 2, regDead (A_IDX, ic) && IC_LEFT (ic)->aop->regs[A_IDX] < offset, TRUE, FALSE); + push (ASMOP_X, 0, 2); + offset += 2; + size -= 2; + } + // Going through y is more efficient than two individual pushes for stack operands only. + else if (size >= 2 && regDead (Y_IDX, ic) && IC_LEFT (ic)->aop->regs[YL_IDX] < offset && IC_LEFT (ic)->aop->regs[YH_IDX] < offset && aopOnStack (IC_LEFT (ic)->aop, offset, 2)) + { + genMove_o (ASMOP_Y, 0, IC_LEFT (ic)->aop, offset, 2, regDead (A_IDX, ic) && IC_LEFT (ic)->aop->regs[A_IDX] < offset, FALSE, TRUE); + push (ASMOP_Y, 0, 2); + offset += 2; + size -= 2; + } + // Push directly. + else if (IC_LEFT (ic)->aop->type == AOP_LIT || aopInReg (IC_LEFT (ic)->aop, offset, A_IDX) || IC_LEFT (ic)->aop->type == AOP_DIR || IC_LEFT (ic)->aop->type == AOP_IMMD) + { + push (IC_LEFT (ic)->aop, offset, 1); + offset++; + size--; + } + // a is not free. Try to use xl instead. + else if ((!regDead (A_IDX, ic) || IC_LEFT (ic)->aop->regs[A_IDX] > offset) && (regDead (XL_IDX, ic) && IC_LEFT (ic)->aop->regs[XL_IDX] <= offset || aopInReg (IC_LEFT (ic)->aop, offset, XL_IDX))) + { + genMove_o (ASMOP_X, 0, IC_LEFT (ic)->aop, offset, 1, FALSE, FALSE, FALSE); + push (ASMOP_X, 0, 2); + adjustStack (1, FALSE, FALSE, FALSE); + offset++; + size--; + } + // Neither a nor xl is free. Allocator guarantees that yl is free then; use it. + else if (!regDead (A_IDX, ic) || IC_LEFT (ic)->aop->regs[A_IDX] > offset) + { + genMove_o (ASMOP_Y, 0, IC_LEFT (ic)->aop, offset, 1, FALSE, FALSE, FALSE); + push (ASMOP_Y, 0, 2); + adjustStack (1, FALSE, FALSE, FALSE); + offset++; + size--; + } + else + { + cheapMove (ASMOP_A, 0, IC_LEFT (ic)->aop, offset, FALSE); + push (ASMOP_A, 0, 1); + offset++; + size--; + } + } + + freeAsmop (IC_LEFT (ic)); +} + +/*-----------------------------------------------------------------*/ +/* genCall - generates a call statement */ +/*-----------------------------------------------------------------*/ +static void +genCall (const iCode *ic) +{ + bool SomethingReturned, bigreturn, half; + sym_link *dtype = operandType (IC_LEFT (ic)); + sym_link *etype = getSpec (dtype); + sym_link *ftype = IS_FUNCPTR (dtype) ? dtype->next : dtype; + + D (emit2 ("; genCall", "")); + + saveRegsForCall (ic); + + /* Return value of big type or returning struct or union. */ + bigreturn = (getSize (ftype->next) > 4) || IS_STRUCT (ftype->next); + if (bigreturn) + { + wassertl (IC_RESULT (ic), "Unused return value in call to function returning large type."); + + aopOp (IC_RESULT (ic), ic); + + if (IC_RESULT (ic)->aop->type != AOP_STK) + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented return value size / type combination."); + cost (180, 180); + } + + emit2 ("ldw", "x, sp"); + emit2 ("addw", "x, #%d", IC_RESULT (ic)->aop->aopu.bytes[getSize (ftype->next) - 1].byteu.stk + G.stack.pushed); + cost (2 + 4, 1 + 2); + push (ASMOP_X, 0, 2); + + freeAsmop (IC_RESULT (ic)); + } + + if (ic->op == PCALL) + { + operand *left = IC_LEFT (ic); + + aopOp (left, ic); + + if (options.model == MODEL_LARGE && left->aop->type == AOP_DIR) + { + wassertl (left->aop->size == 3, "Functions pointers should be 24 bits in large memory model."); + + emit2 ("callf", "[%s]", left->aop->aopu.aop_dir); + cost (4, 8); + } + else if (options.model == MODEL_LARGE) + { + wassertl (left->aop->size == 3, "Functions pointers should be 24 bits in large memory model."); + + symbol *tlbl = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + + if (!regalloc_dry_run) + { + emit2("push", "#(!tlabel)", labelKey2num (tlbl->key)); + emit2("push", "#(!tlabel >> 8)", labelKey2num (tlbl->key)); + emit2("push", "#(!tlabel >> 16)", labelKey2num (tlbl->key)); + cost (6, 3); + G.stack.pushed += 3; + } + + if (aopInReg (left->aop, 0, X_IDX) || aopInReg (left->aop, 0, Y_IDX)) + push (left->aop, 0, 2); + else if (aopOnStackNotExt (left->aop, 0, 2) && !(aopInReg (left->aop, 2, XL_IDX) || aopInReg (left->aop, 2, XH_IDX)) || + aopInReg (left->aop, 2, A_IDX)) + { + genMove (ASMOP_X, left->aop, !aopInReg (left->aop, 2, A_IDX), true, false); + push (ASMOP_X, 0, 2); + } + else + { + cheapMove (ASMOP_A, 0, left->aop, 0, false); + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, left->aop, 1, false); + push (ASMOP_A, 0, 1); + } + cheapMove (ASMOP_A, 0, left->aop, 2, false); + push (ASMOP_A, 0, 1); + emit2("retf", ""); + cost (1, 5); + + G.stack.pushed -= 6; + + emitLabel (tlbl); + } + else + { + wassertl (left->aop->size == 2, "Functions pointers should be 16 bits in medium memory model."); + + if (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) + { + emit2 ("call", "%s", aopGet2 (left->aop, 0)); + cost (3, 4); + } + else if (aopInReg (left->aop, 0, Y_IDX)) // Faster than going through x. + { + emit2 ("call", "(y)"); + cost (2, 4); + } + else + { + genMove (ASMOP_X, left->aop, TRUE, TRUE, TRUE); + + emit2 ("call", "(x)"); + cost (1, 4); + } + } + freeAsmop (left); + } + else + { + if (options.model == MODEL_LARGE) + { + if (IS_LITERAL (etype)) + { + emit2 ("callf", "0x%06X", ulFromVal (OP_VALUE (IC_LEFT (ic)))); + cost (4, 5); + } + else + { + bool jump = (!ic->parmBytes && IFFUNC_ISNORETURN (OP_SYMBOL (IC_LEFT (ic))->type)); + emit2 (jump ? "jpf" : "callf", "%s", + (OP_SYMBOL (IC_LEFT (ic))->rname[0] ? OP_SYMBOL (IC_LEFT (ic))->rname : OP_SYMBOL (IC_LEFT (ic))->name)); + cost (4, jump ? 2 : 5); + } + } + else + { + if (IS_LITERAL (etype)) + { + emit2 ("call", "0x%04X", ulFromVal (OP_VALUE (IC_LEFT (ic)))); + cost (3, 4); + } + else + { + bool jump = (!ic->parmBytes && IFFUNC_ISNORETURN (OP_SYMBOL (IC_LEFT (ic))->type)); + emit2 (jump ? "jp" : "call", "%s", + (OP_SYMBOL (IC_LEFT (ic))->rname[0] ? OP_SYMBOL (IC_LEFT (ic))->rname : OP_SYMBOL (IC_LEFT (ic))->name)); + cost (3, jump ? 1 : 4); + } + } + } + + SomethingReturned = (IS_ITEMP (IC_RESULT (ic)) && + (OP_SYMBOL (IC_RESULT (ic))->nRegs || OP_SYMBOL (IC_RESULT (ic))->spildir)) + || IS_TRUE_SYMOP (IC_RESULT (ic)); + + if (ic->parmBytes || bigreturn) + adjustStack (ic->parmBytes + bigreturn * 2, !(SomethingReturned && getSize (ftype->next) == 1), !(SomethingReturned && (getSize (ftype->next) == 2 || getSize (ftype->next) == 4)), !(SomethingReturned && getSize (ftype->next) == 4)); + + half = stm8_extend_stack && SomethingReturned && getSize (ftype->next) == 4; + + /* Todo: More efficient handling of long return value for function with extendeds stack when the result value does not use the extended stack. */ + + /* Special handling of assignment of long result value when using extended stack. */ + if (half) + { + asmop *result; + int save_a = 0; + + aopOp (IC_RESULT (ic), ic); + result = IC_RESULT (ic)->aop; + + push (ASMOP_Y, 0, 2); + emit2 ("ldw", "y, (3, sp)"); + cost (2, 2); + + emit2 ("ld", "a, (2, sp)"); + cost (2, 1); + if (IC_RESULT (ic)->aop->size > 2) + cheapMove (IC_RESULT (ic)->aop, 2, ASMOP_A, 0, TRUE); + if (result->size > 2) + if (aopRS (result) && aopRS (ASMOP_A) && + result->aopu.bytes[2].in_reg && ASMOP_A->aopu.bytes[0].in_reg && + result->aopu.bytes[2].byteu.reg == ASMOP_A->aopu.bytes[0].byteu.reg) + { + push (ASMOP_A, 0, 1); + save_a = 1; + } + + if (save_a) + emit2 ("ld", "a, (2, sp)"); + else + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + if (IC_RESULT (ic)->aop->size > 3) + cheapMove (IC_RESULT (ic)->aop, 3, ASMOP_A, 0, TRUE); + if (save_a) + { + pop (ASMOP_A, 0, 1); + save_a = 0; + } + + adjustStack (4, FALSE, FALSE, FALSE); + + if (IC_RESULT (ic)->aop->regs[XL_IDX] >= 2 || IC_RESULT (ic)->aop->regs[XH_IDX] >= 2) + { + wassert (regalloc_dry_run); + cost (180, 180); + } + + freeAsmop (IC_RESULT (ic)); + } + else if (stm8_extend_stack) + pop (ASMOP_Y, 0, 2); + + /* if we need assign a result value */ + if (SomethingReturned && !bigreturn) + { + int size; + + aopOp (IC_RESULT (ic), ic); + + size = !half ? IC_RESULT (ic)->aop->size : (IC_RESULT (ic)->aop->size > 2 ? 2 : IC_RESULT (ic)->aop->size); + + wassert (getSize (ftype->next) >= 1 && getSize (ftype->next) <= 4); + + genMove_o (IC_RESULT (ic)->aop, 0, getSize (ftype->next) == 1 ? ASMOP_A : ASMOP_XY, 0, size, TRUE, TRUE, !stm8_extend_stack); + + freeAsmop (IC_RESULT (ic)); + } + + // Restore regs. + if (!regDead (Y_IDX, ic) && !stm8_extend_stack) + if (regDead (YH_IDX, ic)) + { + adjustStack (1, FALSE, FALSE, FALSE); + swap_to_a (YL_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(YL_IDX); + } + else if (regDead (YL_IDX, ic)) + { + swap_to_a (YH_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(YH_IDX); + adjustStack (1, FALSE, FALSE, FALSE); + } + else + pop (ASMOP_Y, 0, 2); + + if (!regDead (X_IDX, ic)) + { + if (regDead (XH_IDX, ic)) + { + adjustStack (1, FALSE, FALSE, FALSE); + swap_to_a (XL_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(XL_IDX); + } + else if (regDead (XL_IDX, ic)) + { + swap_to_a (XH_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(XH_IDX); + adjustStack (1, FALSE, FALSE, FALSE); + } + else + pop (ASMOP_X, 0, 2); + } + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + + //if (!regDead (C_IDX, ic)) + // pop (ASMOP_C, 0, 1); + + G.saved = FALSE; +} + +/*---------------------------------------------------------------------*/ +/* genCritical - mask interrupts until important block completes */ +/*---------------------------------------------------------------------*/ + +static void +genCritical (iCode * ic) +{ + emit2("sim", ""); + cost (1, 1); +} + +static void +genEndCritical (iCode * ic) +{ + emit2("rim", ""); + cost (1, 1); +} + +/*-----------------------------------------------------------------*/ +/* genFunction - generated code for function entry */ +/*-----------------------------------------------------------------*/ +static void +genFunction (iCode *ic) +{ + const symbol *sym = OP_SYMBOL_CONST (IC_LEFT (ic)); + sym_link *ftype = operandType (IC_LEFT (ic)); + bool bigreturn; + + G.stack.pushed = 0; + G.stack.param_offset = 0; + + /* create the function header */ + emit2 (";", "-----------------------------------------"); + emit2 (";", " function %s", sym->name); + emit2 (";", "-----------------------------------------"); + + D (emit2 (";", stm8_assignment_optimal ? "Register assignment is optimal." : "Register assignment might be sub-optimal.")); + D (emit2 (";", "Stack space usage: %d bytes.", sym->stack)); + + emit2 ("", "%s:", sym->rname); + genLine.lineCurr->isLabel = 1; + + if (IFFUNC_ISNAKED(ftype)) + { + emit2(";", "naked function: no prologue."); + return; + } + + if (IFFUNC_ISCRITICAL (ftype)) + genCritical (NULL); + + // Workaround for hardware bug: Undocumented bit 6 of the condition code register needs to be cleared before div/divw. It is set during div/divw execution, and then reset. Without the workaround, the div and divw inside interrupt routines will give wrong results when the interrupt itself occured while another div or divw was executed. + // For more information see sections titled "Unexpected DIV/DIVW instruction result in ISR" in various STM8 errata notes (apparently all STM8 are affected). + // The workaround here is the one recommended by STM in the erratum. There might be better ways to do it. + if (IFFUNC_ISISR (sym->type) && !sym->funcDivFlagSafe) + { + D (emit2 (";", "Reset bit 6 of reg CC. Hardware bug workaround.")); +#if 0 + // The workaround recommended by STM. 6 bytes, 7 cycles (5 nominally, two more due to pipeline stalls) + emit2 ("push", "cc"); + emit2 ("pop", "a"); + emit2 ("and", "a, #0xbf"); + emit2 ("push", "a"); + emit2 ("pop", "cc"); + cost (6, 5); +#else + // The workaround obtained by further investigation of RFE #449. Experiments on STM8S208MB and STM8L152C6 show that div resets bit 6 of cc. + if (!optimize.codeSize) + emit3 (A_CLR, ASMOP_A, 0); // Zero accumulator to reduce cycle cost in following division. + emit2 ("div", "x, a"); // According to measurements on the STM8S208MB and STM8L152C6, div takes 2-3 cycles for divisions by zero and 2-17 cycles in general. + cost (1, 3); +#endif + } + + if (stm8_extend_stack) // Setup for extended stack access. + { + G.stack.size = stm8_call_stack_size + (sym->stack ? sym->stack : 0); + D (emit2 (";", "Setup y for extended stack access.")); + emit2 ("ldw", "y, sp"); + emit2 ("subw", "y, #%ld", G.stack.size - 256); + cost (6, 3); + } + + bigreturn = (getSize (ftype->next) > 4); + G.stack.param_offset += bigreturn * 2; + + if (options.debug && !regalloc_dry_run) + debugFile->writeFrameAddress (NULL, &stm8_regs[SP_IDX], 1); + + /* adjust the stack for the function */ + if (sym->stack) + adjustStack (-sym->stack, TRUE, TRUE, !stm8_extend_stack); +} + +/*-----------------------------------------------------------------*/ +/* genEndFunction - generates epilogue for functions */ +/*-----------------------------------------------------------------*/ +static void +genEndFunction (iCode *ic) +{ + symbol *sym = OP_SYMBOL (IC_LEFT (ic)); + int retsize = getSize (sym->type->next); + + D (emit2 ("; genEndFunction", "")); + + wassert (!regalloc_dry_run); + + if (IFFUNC_ISNAKED(sym->type)) + { + D (emit2 (";", "naked function: no epilogue.")); + if (options.debug && currFunc && !regalloc_dry_run) + debugFile->writeEndFunction (currFunc, ic, 0); + return; + } + + /* adjust the stack for the function */ + if (sym->stack) + adjustStack (sym->stack, retsize != 1, retsize != 2 && retsize != 4, retsize != 4); + + wassertl (!G.stack.pushed, "Unbalanced stack."); + + if (IFFUNC_ISCRITICAL (sym->type)) + genEndCritical (NULL); + + if (IFFUNC_ISISR (sym->type)) + { + /* if debug then send end of function */ + if (options.debug && currFunc && !regalloc_dry_run) + debugFile->writeEndFunction (currFunc, ic, 1); + + emit2 ("iret", ""); + cost (1, 11); + } + else + { + /* if debug then send end of function */ + if (options.debug && currFunc && !regalloc_dry_run) + debugFile->writeEndFunction (currFunc, ic, 1); + + if (options.model == MODEL_LARGE) + { + emit2 ("retf", ""); + cost (1, 5); + } + else + { + emit2 ("ret", ""); + cost (1, 4); + } + } +} + +/*-----------------------------------------------------------------*/ +/* genReturn - generate code for return statement */ +/*-----------------------------------------------------------------*/ +static void +genReturn (const iCode *ic) +{ + operand *left = IC_LEFT (ic); + int size, i; + bool stacked = FALSE; + + D (emit2 ("; genReturn", "")); + + /* if we have no return value then + just generate the "ret" */ + if (!IC_LEFT (ic)) + goto jumpret; + + /* we have something to return then + move the return value into place */ + aopOp (left, ic); + size = left->aop->size; + + switch (size) + { + case 0: + break; + case 1: + cheapMove (ASMOP_A, 0, left->aop, 0, FALSE); + break; + case 2: + genMove (ASMOP_X, left->aop, TRUE, TRUE, TRUE); + break; + case 3: + wassertl (regalloc_dry_run || !stm8_extend_stack, "Unimplemented 24-bit return in function with extended stack access."); + genMove (ASMOP_XYL, left->aop, TRUE, TRUE, TRUE); + break; + case 4: + wassertl (regalloc_dry_run || !stm8_extend_stack, "Unimplemented long return in function with extended stack access."); + genMove (ASMOP_XY, left->aop, TRUE, TRUE, TRUE); + break; + default: + wassertl (size > 4, "Return not implemented for return value of this size."); + + for(i = 0; i < size; i++) + if (aopInReg (left->aop, i, XL_IDX) || aopInReg (left->aop, i, XH_IDX)) + { + push (ASMOP_X, 0, 2); + stacked = TRUE; + break; + } + + unsigned int o = G.stack.pushed + 3 + (options.model == MODEL_LARGE); + + if (o <= 255) + { + emit2 ("ldw", "x, (0x%02x, sp)", o); + cost (2, 2); + } + else + { + emit2 ("ldw", "x, sp"); + cost (1, 1); + emit2 ("addw", "x, #0x%04x", o); + cost (3, 2); + emit2 ("ldw", "x, (x)"); + cost (1, 1); + } + + // Clear a first. + for(i = 0; i < size; i++) + if (aopInReg (left->aop, i, A_IDX)) + { + emit2 ("ld", "(#%d, x), a", size - 1 - i); + cost (2, 1); + break; + } + + for(i = 0; i < size;) + { + if (aopInReg (left->aop, i, Y_IDX) || size > 2 && left->aop->regs[YL_IDX] < i && left->aop->regs[YH_IDX] < i && (aopOnStackNotExt (left->aop, i, 2) || left->aop->type == AOP_LIT)) + { + genMove_o (ASMOP_Y, 0, left->aop, i, 2, TRUE, FALSE, TRUE); + if (size - 2 - i) + { + emit2 ("ldw", "(#%d, x), y", size - 2 - i); + cost (2, 2); + } + else + { + emit2 ("ldw", "(x), y"); + cost (1, 2); + } + i += 2; + } + else if (aopInReg (left->aop, i, XL_IDX) || aopInReg (left->aop, i, XH_IDX)) + { + emit2 ("ld", "a, (#%d, sp)", (int)(aopInReg (left->aop, i, XL_IDX)) + 1); + emit2 ("ld", "(#%d, x), a", size - 1 - i); + cost (4, 2); + i++; + } + else if (!aopInReg (left->aop, i, A_IDX)) + { + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + if (size - 1 - i) + { + emit2 ("ld", "(#%d, x), a", size - 1 - i); + cost (2, 1); + } + else + { + emit2 ("ld", "(x), a"); + cost (1, 1); + } + i++; + } + else + i++; + } + + if (stacked) + adjustStack (2, TRUE, TRUE, TRUE); + } + + freeAsmop (left); + +jumpret: + /* generate a jump to the return label + if the next is not the return statement */ + if (!(ic->next && ic->next->op == LABEL && IC_LABEL (ic->next) == returnLabel)) + emitJP(returnLabel, 1.0f); +} + +/*-----------------------------------------------------------------*/ +/* genLabel - generates a label */ +/*-----------------------------------------------------------------*/ +static void +genLabel (const iCode *ic) +{ + D (emit2 ("; genLabel", "")); + + /* special case never generate */ + if (IC_LABEL (ic) == entryLabel) + return; + + if (options.debug && !regalloc_dry_run) + debugFile->writeLabel (IC_LABEL (ic), ic); + + emitLabel (IC_LABEL (ic)); +} + +/*-----------------------------------------------------------------*/ +/* genGoto - generates a jump */ +/*-----------------------------------------------------------------*/ +static void +genGoto (const iCode *ic) +{ + D (emit2 ("; genGoto", "")); + + emitJP(IC_LABEL (ic), 1.0f); +} + +/*-----------------------------------------------------------------*/ +/* genPlus - generates code for addition */ +/*-----------------------------------------------------------------*/ +static void +genPlus (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + asmop *leftop; + asmop *rightop; + + struct asmop lop_impl; + struct asmop rop_impl; + + int size, i, j; + bool started; + bool pushed_a = FALSE; + bool result_in_a = FALSE; + symbol *endlbl = 0; + + D (emit2 ("; genPlus", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + size = result->aop->size; + + /* Swap if left is literal or right is in A. */ + if (left->aop->type == AOP_LIT || right->aop->type != AOP_LIT && left->aop->type == AOP_IMMD || aopInReg (right->aop, 0, A_IDX) || aopInReg (right->aop, 0, X_IDX) || right->aop->type != AOP_LIT && right->aop->size == 1 && aopOnStackNotExt (left->aop, 0, 2) || left->aop->type == AOP_STK && (right->aop->type == AOP_REG || right->aop->type == AOP_REGSTK)) // todo: Swap in more cases when right in reg, left not. Swap individually per-byte. + { + operand *t = right; + right = left; + left = t; + } + + if (left->aop->type == AOP_REGSTK && right->aop->type == AOP_REGSTK) + { + bool all_in_reg, all_on_stack; + + lop_impl.size = 0; + rop_impl.size = 0; + + lop_impl.regs[A_IDX] = -1; + lop_impl.regs[XL_IDX] = -1; + lop_impl.regs[XH_IDX] = -1; + lop_impl.regs[YL_IDX] = -1; + lop_impl.regs[YH_IDX] = -1; + rop_impl.regs[A_IDX] = -1; + rop_impl.regs[XL_IDX] = -1; + rop_impl.regs[XH_IDX] = -1; + rop_impl.regs[YL_IDX] = -1; + rop_impl.regs[YH_IDX] = -1; + + for (i = 0; i < size; i++) + { + asmop *lop = 0; + asmop *rop = 0; + + if (left->aop->size < i && right->aop->size < i) + continue; + else if (left->aop->size < i && aopOnStack (right->aop, i, 1)) + rop = right->aop; + else if (left->aop->size < i) + lop = right->aop; + else if (right->aop->size < i && aopOnStack (left->aop, i, 1)) + rop = left->aop; + else if (right->aop->size < i) + lop = left->aop; + else if (!left->aop->aopu.bytes[i].in_reg && right->aop->aopu.bytes[i].in_reg) + { + lop = right->aop; + rop = left->aop; + } + else + { + lop = left->aop; + rop = right->aop; + } + + if (lop) + { + lop_impl.aopu.bytes[i] = lop->aopu.bytes[i]; + if (lop->aopu.bytes[i].in_reg) + lop_impl.regs[lop->aopu.bytes[i].byteu.reg->rIdx] = i; + lop_impl.size++; + } + if (rop) + { + rop_impl.aopu.bytes[i] = rop->aopu.bytes[i]; + if (rop->aopu.bytes[i].in_reg) + rop_impl.regs[rop->aopu.bytes[i].byteu.reg->rIdx] = i; + rop_impl.size++; + } + } + + all_in_reg = all_on_stack = TRUE; + for (i = 0; i < lop_impl.size; i++) + if (lop_impl.aopu.bytes[i].in_reg) + all_on_stack = FALSE; + else + all_in_reg = FALSE; + lop_impl.type = all_on_stack ? AOP_STK : (all_in_reg ? AOP_REG : AOP_REGSTK); + all_in_reg = all_on_stack = TRUE; + for (i = 0; i < rop_impl.size; i++) + if (rop_impl.aopu.bytes[i].in_reg) + all_on_stack = FALSE; + else + all_in_reg = FALSE; + rop_impl.type = all_on_stack ? AOP_STK : (all_in_reg ? AOP_REG : AOP_REGSTK); + + leftop = &lop_impl; + rightop = &rop_impl; + } + else + { + leftop = left->aop; + rightop = right->aop; + } + + for (i = 0, started = FALSE; i < size;) // Todo: 16-bit operation in dead source might be cheaper than add. + { + bool a_free = regDead (A_IDX, ic) && leftop->regs[A_IDX] <= i && rightop->regs[A_IDX] <= i && !result_in_a || pushed_a; + bool xl_free = regDead (XL_IDX, ic) && (result->aop->regs[XL_IDX] >= i || result->aop->regs[XL_IDX] < 0) && leftop->regs[XL_IDX] <= i + 1 && rightop->regs[XL_IDX] < i; + bool xh_free = regDead (XH_IDX, ic) && (result->aop->regs[XH_IDX] >= i || result->aop->regs[XH_IDX] < 0) && leftop->regs[XH_IDX] <= i + 1 && rightop->regs[XH_IDX] < i; + bool x_free = xl_free && xh_free; + bool yl_free = regDead (YL_IDX, ic) && (result->aop->regs[YL_IDX] >= i || result->aop->regs[YL_IDX] < 0) && leftop->regs[YL_IDX] <= i + 1 && rightop->regs[YL_IDX] < i; + bool yh_free = regDead (YH_IDX, ic) && (result->aop->regs[YH_IDX] >= i || result->aop->regs[YH_IDX] < 0) && leftop->regs[YH_IDX] <= i + 1 && rightop->regs[YH_IDX] < i; + bool y_free = yl_free && yh_free; + + // Special case for rematerializing sums + if (!started && i == size - 2 && (leftop->type == AOP_IMMD && rightop->type == AOP_LIT) && + (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX) || x_free && aopOnStack (result->aop, i, 2))) + { + unsigned offset = byteOfVal (right->aop->aopu.aop_lit, 1) * 256 + byteOfVal (right->aop->aopu.aop_lit, 0); + bool y = aopInReg (result->aop, i, Y_IDX) ; + emit2 ("ldw", y ? "y, %s+%d" : "x, %s+%d", aopGet2 (leftop, i), offset); + cost (3 + y, 2); + genMove_o (result->aop, i, y ? ASMOP_Y : ASMOP_X, 0, 2, a_free, TRUE, y_free); + started = TRUE; + i += 2; + } + // We can use incw / decw easily only for the only, top non-zero word, since it neither takes into account an existing carry nor does it update the carry. + else if (!started && i == size - 2 && + (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX)) && + rightop->type == AOP_LIT && !byteOfVal (rightop->aopu.aop_lit, i + 1) && + byteOfVal (rightop->aopu.aop_lit, i) <= 1 + aopInReg (result->aop, i, X_IDX) || + !started && i == size - 1 && + !(aopInReg (leftop, i, A_IDX) && regDead (A_IDX, ic)) && + (aopInReg (result->aop, i, XL_IDX) && regDead (XH_IDX, ic) && leftop->regs[XH_IDX] < 0 && result->aop->regs[XH_IDX] < 0 || aopInReg (result->aop, i, YL_IDX) && regDead (YH_IDX, ic) && leftop->regs[YH_IDX] < 0 && result->aop->regs[YH_IDX] < 0) && + rightop->type == AOP_LIT && byteOfVal (rightop->aopu.aop_lit, i) <= 1 + aopInReg (result->aop, i, XL_IDX)) + { + bool half = (i == size - 1); + bool x = aopInReg (result->aop, i, half ? XL_IDX : X_IDX) ; + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, leftop, i, 2 - half, a_free, x, !x); + for (j = 0; j < byteOfVal (rightop->aopu.aop_lit, i); j++) + emit3w (A_INCW, x ? ASMOP_X : ASMOP_Y, 0); + cost (x ? 1 : 2, 1); + started = TRUE; + i += 2; + } + else if (!started && i == size - 2 && + (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX)) && + aopIsLitVal (rightop, i, 2, 0xffff)) + { + bool x = aopInReg (result->aop, i, X_IDX); + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, leftop, i, 2, a_free, x, !x); + emit3w (A_DECW, x ? ASMOP_X : ASMOP_Y, 0); + started = TRUE; + i += 2; + } + // Using incw with a chain or conditional jumps to emulate carry - allows somewhat more efficient 32-bit increment. + else if(!started && !pushed_a && rightop->type == AOP_LIT && regDead (X_IDX, ic) && !((size - i) % 2) && + aopIsLitVal (rightop, i, 2, 0x0001) && aopIsLitVal (rightop, i + 2, size - i, 0) && + ((aopOnStack (leftop, i, size - i) && aopOnStack (result->aop, i, size - i) || + aopOnStack (leftop, i, size - i - 2) && aopOnStack (leftop, i, size - i - 2) && aopInReg (result->aop, size - 2, Y_IDX) && aopInReg (result->aop, size - 2, Y_IDX)) && + result->aop->aopu.bytes[i].byteu.stk == leftop->aopu.bytes[i].byteu.stk || + aopOnStack (leftop, i + 2, size - i - 2) && aopOnStack (leftop, i + 2, size - i - 2) && aopInReg (result->aop, i, Y_IDX) && aopInReg (result->aop, i, Y_IDX) && result->aop->aopu.bytes[i + 2].byteu.stk == leftop->aopu.bytes[i + 2].byteu.stk || + size - i == 4 && + (aopInReg (leftop, i, Y_IDX) && aopInReg (result->aop, i, Y_IDX) && aopInReg (leftop, i + 2, X_IDX) && aopInReg (result->aop, i + 2, X_IDX) || aopInReg (leftop, i, X_IDX) && aopInReg (result->aop, i, X_IDX) && aopInReg (leftop, i + 2, Y_IDX) && aopInReg (result->aop, i + 2, Y_IDX)))) + { + if(!endlbl && !regalloc_dry_run) + endlbl = newiTempLabel (NULL); + for(;;) + { + if(aopInReg (leftop, i, Y_IDX) && aopInReg (result->aop, i, Y_IDX) || + aopInReg (leftop, i, X_IDX) && aopInReg (result->aop, i, X_IDX)) + emit3w_o (A_INCW, result->aop, i, 0, 0); + else + { + genMove_o (ASMOP_X, 0, leftop, i, 2, a_free, TRUE, y_free); + emit3w (A_INCW, ASMOP_X, 0); + genMove_o (result->aop, i, ASMOP_X, 0, 2, a_free, TRUE, y_free); + } + i += 2; + if(i >= size) + break; + if (endlbl) + emit2 ("jrne", "!tlabel", labelKey2num (endlbl->key)); + } + } + else if (!started && + (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX)) && + (rightop->type == AOP_LIT || rightop->type == AOP_IMMD || aopOnStackNotExt (rightop, i, 2) || rightop->type == AOP_DIR && i + 1 < rightop->size) && + !(aopInReg (result->aop, i, Y_IDX) && aopInReg (left->aop, i, X_IDX) && regDead (X_IDX, ic))) + { + bool x = aopInReg (result->aop, i, X_IDX); + genMove_o (x ? ASMOP_X : ASMOP_Y, 0, leftop, i, 2, a_free, x, !x); + if (!aopIsLitVal (rightop, i, 2, 0x0000)) + { + emit2 ("addw", x ? "x, %s" : "y, %s", aopGet2 (rightop, i)); + cost ((x || aopOnStack (rightop, 0, 2)) ? 3 : 4, 2); + started = TRUE; + } + i += 2; + } + else if (!started && i == size - 1 && + (aopOnStack (leftop, i, 1) || leftop->type == AOP_DIR) && aopSame (result->aop, i, leftop, i, 1) && + rightop->type == AOP_LIT && byteOfVal (rightop->aopu.aop_lit, i) <= 2 + !a_free) + { + for (j = 0; j < byteOfVal (rightop->aopu.aop_lit, i); j++) + emit3_o (A_INC, result->aop, i, 0, 0); + i++; + } + else if (!started && i == size - 1 && + (aopOnStack (leftop, i, 1) || leftop->type == AOP_DIR) && aopSame (result->aop, i, leftop, i, 1) && + rightop->type == AOP_LIT && byteOfVal (rightop->aopu.aop_lit, i) >= 254 - !a_free) + { + for (j = byteOfVal (rightop->aopu.aop_lit, i); j < 256; j++) + emit3_o (A_DEC, result->aop, i, 0, 0); + i++; + } + else if (!started && i + 1 < size && (x_free || aopInReg(leftop, i, X_IDX) && regDead (X_IDX, ic)) && + (aopOnStackNotExt (rightop, i, 2) || rightop->type == AOP_LIT || rightop->type == AOP_IMMD || rightop->type == AOP_DIR && i + 1 < rightop->size) && + ((aopOnStack (result->aop, i, 2) || result->aop->type == AOP_DIR) && (aopRS (leftop) && !aopInReg(leftop, i, A_IDX) && !aopInReg(leftop, i + 1, A_IDX) || leftop->type == AOP_DIR) || + aopInReg(leftop, i, X_IDX) && aopInReg(result->aop, i, Y_IDX) || + aopInReg(leftop, i, X_IDX) && (result->aop->regs[XL_IDX] < 0 || result->aop->regs[XL_IDX] >= i) && (result->aop->regs[XH_IDX] < 0 || result->aop->regs[XH_IDX] >= i) && (aopInReg(leftop, i, XL_IDX) || aopInReg(leftop, i + 1, XH_IDX) || aopInReg(leftop, i, XH_IDX) && aopInReg(leftop, i + 1, XL_IDX)))) + { + genMove_o (ASMOP_X, 0, leftop, i, 2, a_free, TRUE, FALSE); + if (i == size - 2 && rightop->type == AOP_LIT && byteOfVal (rightop->aopu.aop_lit, i) <= 2 && !byteOfVal (rightop->aopu.aop_lit, i + 1)) + for (j = 0; j < byteOfVal (rightop->aopu.aop_lit, i); j++) + emit3w (A_INCW, ASMOP_X, 0); + else + { + emit2 ("addw", "x, %s", aopGet2 (rightop, i)); + cost (3 + (rightop->type == AOP_DIR), 2); + } + + genMove_o (result->aop, i, ASMOP_X, 0, 2, a_free, TRUE, FALSE); + if (aopInReg (result->aop, i, A_IDX) || aopInReg (result->aop, i + 1, A_IDX)) + result_in_a = TRUE; + started = TRUE; + i += 2; + } + else if (!started && i + 1 < size && (y_free || aopInReg(leftop, i, Y_IDX) && regDead (Y_IDX, ic)) && + (aopOnStack (result->aop, i, 2) || result->aop->type == AOP_DIR) && + (aopOnStack (leftop, i, 2) || aopInReg(leftop, i, Y_IDX) || leftop->type == AOP_DIR) && + (aopOnStackNotExt (rightop, i, 2) || rightop->type == AOP_LIT || rightop->type == AOP_IMMD || rightop->type == AOP_DIR && i + 1 < rightop->size)) + { + genMove_o (ASMOP_Y, 0, leftop, i, 2, a_free, TRUE, FALSE); + if (i == size - 2 && rightop->type == AOP_LIT && byteOfVal (rightop->aopu.aop_lit, i) <= 2 && !byteOfVal (rightop->aopu.aop_lit, i + 1)) + for (j = 0; j < byteOfVal (rightop->aopu.aop_lit, i); j++) + emit3w (A_INCW, ASMOP_Y, 0); + else + { + emit2 ("addw", "y, %s", aopGet2 (rightop, i)); + cost (4 - aopOnStack (rightop, i, 2), 2); + } + genMove_o (result->aop, i, ASMOP_Y, 0, 2, a_free, TRUE, FALSE); + if (aopInReg (result->aop, i, A_IDX) || aopInReg (result->aop, i + 1, A_IDX)) + result_in_a = TRUE; + started = TRUE; + i += 2; + } + else if (!started && rightop->type == AOP_LIT && + (aopInReg (leftop, i, XH_IDX) && aopInReg (result->aop, i, XH_IDX) || aopInReg (leftop, i, YH_IDX) && aopInReg (result->aop, i, YH_IDX))) + { + emit2 ("addw", "%s, #%d", aopInReg (leftop, i, YH_IDX) ? "y" : "x", byteOfVal (rightop->aopu.aop_lit, i) << 8); + cost (3 + aopInReg (leftop, i, YH_IDX), 2); + started = TRUE; + i++; + } + else if (!started && i == size - 1 && rightop->type == AOP_LIT && // For yl, we only save a cycle compared to the normal way. + (aopInReg (leftop, i, XL_IDX) && aopInReg (result->aop, i, XL_IDX) && xh_free || aopInReg (leftop, i, YL_IDX) && aopInReg (result->aop, i, YL_IDX) && yh_free)) + { + emit2 ("addw", "%s, #%d", aopInReg (leftop, i, YL_IDX) ? "y" : "x", byteOfVal (rightop->aopu.aop_lit, i)); + cost (3 + aopInReg (leftop, i, YL_IDX), 2); + started = TRUE; + i++; + } + else if (!started && i == size - 1 && aopOnStackNotExt (rightop, i, 1) && + (aopInReg (leftop, i, XL_IDX) && aopInReg (result->aop, i, XL_IDX) && xh_free || aopInReg (leftop, i, YL_IDX) && aopInReg (result->aop, i, YL_IDX) && yh_free)) + { + emit2 ("addw", "%s, (%d, sp)", aopInReg (leftop, i, YL_IDX) ? "y" : "x", rightop->aopu.bytes[i].byteu.stk + G.stack.pushed - 1); + cost (3, 2); + started = TRUE; + i++; + } + else if (started && i == size - 2 && (aopInReg (result->aop, i, X_IDX) || aopInReg (result->aop, i, Y_IDX)) && + (aopOnStackNotExt (leftop, i, 2) || leftop->type == AOP_DIR) && + (aopOnStackNotExt (rightop, i, 2) || rightop->type == AOP_LIT || rightop->type == AOP_IMMD || rightop->type == AOP_DIR)) + { + bool x = aopInReg (result->aop, i, X_IDX); + symbol *skiplbl = 0; + if (!regalloc_dry_run) + skiplbl = newiTempLabel (NULL); + genMove_o (result->aop, i, leftop, i, 2, a_free, TRUE, FALSE); + if (skiplbl) + emit2 ("jrnc", "!tlabel", labelKey2num (skiplbl->key)); + cost (2, 1); // Cycle cost 1: jump, incw together take 2 cycles. + emit3w_o (A_INCW, result->aop, i, 0, 0); + emitLabel (skiplbl); + if (!aopIsLitVal (rightop, i, 2, 0)) + { + emit2 ("addw", x ? "x, %s" : "y, %s", aopGet2 (rightop, i)); + cost ((x || aopOnStack (rightop, i, 2)) ? 3 : 4, 2); + } + i += 2; + } + else if (aopInReg (rightop, i, A_IDX)) //todo: Implement handling of right operands that can't be directly added to a. + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented addition operand."); + cost (180, 180); + i++; + } + else + { + if (pushed_a && leftop->regs[A_IDX] == i && regDead (A_IDX, ic)) + { + pop (ASMOP_A, 0, 1); + pushed_a = FALSE; + } + else if (!a_free) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + result_in_a = FALSE; + } + + if (leftop->regs[A_IDX] == i && pushed_a) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, leftop, i, FALSE); + + if (!started && aopIsLitVal (rightop, i, 1, 0)) + ; // Skip over this byte. + // We can use inc / dec only for the only, top non-zero byte, since it neither takes into account an existing carry nor does it update the carry. + else if (!started && i == size - 1 && (aopIsLitVal (rightop, i, 1, 1) || aopIsLitVal (rightop, i, 1, 255))) + { + emit3 (aopIsLitVal (rightop, i, 1, 1) ? A_INC : A_DEC, ASMOP_A, 0); + started = true; + } + else if (aopInReg (rightop, i, XL_IDX) || aopInReg (rightop, i, XH_IDX) || aopInReg (rightop, i, YL_IDX) || aopInReg (rightop, i, YH_IDX)) + { + int right_offset; + const asmop *right_stacked; + wassert(right_stacked = stack_aop (rightop, i, &right_offset)); + emit2 (started ? "adc" : "add", "a, (%d, sp)", right_offset); + pop (right_stacked, 0, 2); + started = true; + } + else + { + emit3_o (started ? A_ADC : A_ADD, ASMOP_A, 0, i < rightop->size ? rightop : ASMOP_ZERO, i); + started = true; + } + + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + if (aopInReg (result->aop, i, A_IDX)) + result_in_a = TRUE; + + i++; + } + } + + if (pushed_a && !result_in_a) + pop (ASMOP_A, 0, 1); + else if (pushed_a) + adjustStack (1, FALSE, FALSE, FALSE); + + emitLabel (endlbl); + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genMult - generates code for multiplication */ +/*-----------------------------------------------------------------*/ +static void +genMultLit (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + asmop *add_aop; + + D (emit2 ("; genMultLit", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + if (left->aop->type == AOP_LIT) + { + operand *tmp = left; + left = right; + right = tmp; + } + + wassert (right->aop->type == AOP_LIT); + + add_aop = aopOnStackNotExt (left->aop, 0, 2) ? left->aop : 0; + if(!regDead (X_IDX, ic)) + push (ASMOP_X, 0, 2); + genMove (ASMOP_X, left->aop, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + if (!add_aop && isLiteralBit (byteOfVal (right->aop->aopu.aop_lit, 0)) < 0) + push (ASMOP_X, 0, 2); + + /* Generate a sequence of shifts, additions and subtractions based on the canonical signed digit representation of the literal operand */ + { + unsigned long long add, sub; + int topbit, nonzero; + + wassert(!csdOfVal (&topbit, &nonzero, &add, &sub, right->aop->aopu.aop_lit)); + + // If the leading digits of the cse are 1 0 -1 we can use 0 1 1 instead to reduce the number of shifts. + if (topbit >= 2 && (add & (1ull << topbit)) && (sub & (1ull << (topbit - 2)))) + { + add = (add & ~(1u << topbit)) | (3u << (topbit - 2)); + sub &= ~(1u << (topbit - 1)); + topbit--; + } + + for (int bit = topbit - 1; bit >= 0; bit--) + { + emit3w (A_SLLW, ASMOP_X, 0); + if ((add | sub) & (1ull << bit)) + { + emit2 (add & (1ull << bit) ? "addw" : "subw" , "x, %s", add_aop ? aopGet (add_aop, 1) : "(1, sp)"); + cost (3, 2); + } + } + } + + if (!add_aop && isLiteralBit (byteOfVal (right->aop->aopu.aop_lit, 0)) < 0) + adjustStack (2, regDead (A_IDX, ic), FALSE, regDead (Y_IDX, ic)); + genMove (result->aop, ASMOP_X, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + if (regDead (XL_IDX, ic) ^ regDead (XH_IDX, ic)) + { + if (!regalloc_dry_run) + wassert (0); + cost (100, 100); + } + if(!regDead (X_IDX, ic)) + pop (ASMOP_X, 0, 2); + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genMult - generates code for multiplication */ +/*-----------------------------------------------------------------*/ +static void +genMult (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + bool use_y; + + D (emit2 ("; genMult", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + if ((left->aop->size == 2 || right->aop->size == 2) && result->aop->size == 2 && (left->aop->type == AOP_LIT || right->aop->type == AOP_LIT) || + // Some multiplications by powers of two originating from pointer additions reach here and are more efficiently done by genMultLit(). + (aopInReg (result->aop, 0, X_IDX) || (optimize.codeSpeed || !regDead (A_IDX, ic)) && aopInReg (result->aop, 0, Y_IDX)) && + result->aop->size == 2 && left->aop->size == 1 && right->aop->type == AOP_LIT && + (aopIsLitVal (right->aop, 0, 1, 4) || (optimize.codeSpeed || !regDead (A_IDX, ic)) && aopIsLitVal (right->aop, 0, 1, 8))) + { + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); + genMultLit (ic); + return; + } + + if (left->aop->size > 1 || right->aop->size > 1 || result->aop->size > 2) + wassertl (0, "Large multiplication is handled through support function calls."); + + /* Swap if left is literal or right is in A. */ + if (aopInReg (left->aop, 0, A_IDX) || aopInReg (right->aop, 0, XL_IDX) || aopInReg (right->aop, 0, YL_IDX) && !aopInReg (result->aop, 0, X_IDX)) // todo: Swap in more cases when right in reg, left not. + { + operand *t = right; + right = left; + left = t; + } + + use_y = aopInReg (result->aop, 0, Y_IDX) || aopInReg (left->aop, 0, YL_IDX) && !aopInReg (result->aop, 0, X_IDX); + + if (!regDead (use_y ? Y_IDX : X_IDX, ic)) + push (use_y ? ASMOP_Y : ASMOP_X, 0, 2); + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + cheapMove (use_y ? ASMOP_Y : ASMOP_X, 0, left->aop, 0, aopInReg (right->aop, 0, A_IDX)); + cheapMove (ASMOP_A, 0, right->aop, 0, TRUE); + + emit2 ("mul", use_y ? "y, a" : "x, a"); + cost (1 + use_y, 4); + + genMove (result->aop, use_y ? ASMOP_Y : ASMOP_X, TRUE, !use_y || regDead (X_IDX, ic), use_y || regDead (Y_IDX, ic)); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + if (!regDead (use_y ? Y_IDX : X_IDX, ic)) + { + if (result->aop->regs[use_y ? YH_IDX : XH_IDX] >= 0) + { + adjustStack (1, FALSE, FALSE, FALSE); + swap_to_a (use_y ? YL_IDX : XL_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(use_y ? YL_IDX : XL_IDX); + } + else if (result->aop->regs[use_y ? YL_IDX : XL_IDX] >= 0) + { + swap_to_a (use_y ? YH_IDX : XH_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(use_y ? YH_IDX : XH_IDX); + adjustStack (1, FALSE, FALSE, FALSE); + } + else + pop (use_y ? ASMOP_Y : ASMOP_X, 0, 2); + } + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genDivMod2 - generates code for unsigned division */ +/* any operands and results of up to 2 bytes */ +/*-----------------------------------------------------------------*/ +static void +genDivMod2 (const iCode *ic) +{ +#if 0 + D (emit2 ("; genDivMod2", "")); +#endif + + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + + if (!regDead (X_IDX, ic)) + push (ASMOP_X, 0, 2); + if (!regDead (Y_IDX, ic)) + push (ASMOP_Y, 0, 2); + + if (stm8_extend_stack) + { + if (left->aop->regs[XL_IDX] >= 0 || left->aop->regs[XH_IDX] >= 0 || right->aop->regs[A_IDX] >= 1) + { + wassert (regalloc_dry_run); + cost (180, 180); + } + + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + cheapMove (ASMOP_A, 0, right->aop, 0, TRUE); + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, right->aop, 1, TRUE); + push (ASMOP_A, 0, 1); + if (left->aop->regs[XL_IDX] >= 0 || left->aop->regs[XH_IDX] >= 0) + { + wassert (regalloc_dry_run); + cost (180, 180); + } + genMove (ASMOP_X, left->aop, TRUE, TRUE, FALSE); + pop (ASMOP_Y, 0, 2); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + else if (aopRS (left->aop) && left->aop->size >= 2 && aopRS (right->aop) && right->aop->size >= 2) + { + int i; + struct asmop cop; + cop.type = AOP_REGSTK; + cop.size = 4; + for (i = A_IDX; i <= C_IDX; i++) + cop.regs[i] = (left->aop->regs[i] >= right->aop->regs[i] + 2 ? left->aop->regs[i] : right->aop->regs[i] + 2); + cop.aopu.bytes[0] = left->aop->aopu.bytes[0]; + cop.aopu.bytes[1] = left->aop->aopu.bytes[1]; + cop.aopu.bytes[2] = right->aop->aopu.bytes[0]; + cop.aopu.bytes[3] = right->aop->aopu.bytes[1]; + genMove (ASMOP_XY, &cop, regDead (A_IDX, ic), TRUE, TRUE); + } + else if (aopRS (right->aop)) + { + if (left->aop->regs[YL_IDX] >= 0 || left->aop->regs[YH_IDX] >= 0) + { + wassert (regalloc_dry_run); + cost (180, 180); + } + genMove (ASMOP_Y, right->aop, regDead (A_IDX, ic), TRUE, TRUE); + genMove (ASMOP_X, left->aop, regDead (A_IDX, ic), TRUE, FALSE); + } + else + { + if (right->aop->regs[XL_IDX] >= 0 || right->aop->regs[XH_IDX] >= 0) + { + wassert (regalloc_dry_run); + cost (180, 180); + } + genMove (ASMOP_X, left->aop, regDead (A_IDX, ic), TRUE, TRUE); + genMove (ASMOP_Y, right->aop, regDead (A_IDX, ic), FALSE, TRUE); + } + + emit2 ("divw", "x, y"); + cost (1, 17); + + if (!stm8_extend_stack) + genMove (result->aop, ic->op == '/' ? ASMOP_X : ASMOP_Y, regDead (A_IDX, ic), TRUE, TRUE); + + if (ic->op == '%' && stm8_extend_stack) + { + emit2 ("exgw", "x, y"); + cost (1, 1); + } + + if (!regDead (Y_IDX, ic)) + { + if (result->aop->regs[YH_IDX] >= 0) + { + adjustStack (1, FALSE, FALSE, FALSE); + swap_to_a (YL_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(YL_IDX); + } + else if (result->aop->regs[YL_IDX] >= 0) + { + swap_to_a (YH_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(YH_IDX); + adjustStack (1, FALSE, FALSE, FALSE); + } + else + pop (ASMOP_Y, 0, 2); + } + + if (stm8_extend_stack) + genMove (result->aop, ASMOP_X, regDead (A_IDX, ic), TRUE, FALSE); + + if (!regDead (X_IDX, ic)) + { + if (result->aop->regs[XH_IDX] >= 0) + { + adjustStack (1, FALSE, FALSE, FALSE); + swap_to_a (XL_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(XL_IDX); + } + else if (result->aop->regs[XL_IDX] >= 0) + { + swap_to_a (XH_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(XH_IDX); + adjustStack (1, FALSE, FALSE, FALSE); + } + else + pop (ASMOP_X, 0, 2); + } +} + +/*-----------------------------------------------------------------*/ +/* genDivMod1 - generates code for unsigned division */ +/* left operand up to 2 bytes */ +/* right operand 1 byte */ +/* result up to 2 bytes for division, 1 byte for modulo */ +/*-----------------------------------------------------------------*/ +static void +genDivMod1 (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + bool use_y; + + use_y = ic->op == '/' && aopInReg (result->aop, 0, Y_IDX) || aopInReg (left->aop, 0, YL_IDX) && !(ic->op == '/' && aopInReg (result->aop, 0, X_IDX)); + + if (!regDead (use_y ? Y_IDX : X_IDX, ic)) + push (use_y ? ASMOP_Y : ASMOP_X, 0, 2); + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + if (!use_y && aopInReg (right->aop, 0, XL_IDX) && aopOnStack (left->aop, 0, 1)) + { + cheapMove (ASMOP_X, 1, ASMOP_ZERO, 0, false); + cheapMove (ASMOP_A, 0, left->aop, 0, false); + emit2 ("exg", "a, xl"); + cost (1, 1); + } + else if (aopInReg (right->aop, 0, use_y ? YL_IDX : XL_IDX) || aopInReg (right->aop, 0, use_y ? YH_IDX : XH_IDX)) + { + cheapMove (ASMOP_A, 0, right->aop, 0, false); + genMove_o (use_y ? ASMOP_Y : ASMOP_X, 0, left->aop, 0, 2, false, false, false); + } + else + { + genMove_o (use_y ? ASMOP_Y : ASMOP_X, 0, left->aop, 0, 2, right->aop->regs[A_IDX] < 0, false, false); + cheapMove (ASMOP_A, 0, right->aop, 0, false); + } + + emit2 ("div", use_y ? "y, a" : "x, a"); + cost (1 + use_y, 17); + + genMove_o (result->aop, 0, ic->op == '/' ? (use_y ? ASMOP_Y : ASMOP_X) : ASMOP_A, 0, result->aop->size, TRUE, !use_y || regDead(X_IDX, ic), use_y || regDead(Y_IDX, ic)); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + if (!regDead (use_y ? Y_IDX : X_IDX, ic)) + { + if (result->aop->regs[use_y ? YH_IDX : XH_IDX] >= 0) + { + adjustStack (1, FALSE, FALSE, FALSE); + swap_to_a (use_y ? YL_IDX : XL_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(use_y ? YL_IDX : XL_IDX); + } + else if (result->aop->regs[use_y ? YL_IDX : XL_IDX] >= 0) + { + swap_to_a (use_y ? YH_IDX : XH_IDX); + pop (ASMOP_A, 0, 1); + swap_from_a(use_y ? YH_IDX : XH_IDX); + adjustStack (1, FALSE, FALSE, FALSE); + } + else + pop (use_y ? ASMOP_Y : ASMOP_X, 0, 2); + } +} + +/*-----------------------------------------------------------------*/ +/* genDivMod - generates code for unsigned division */ +/*-----------------------------------------------------------------*/ +static void +genDivMod (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + + D (emit2 ("; genDivMod", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + if (result->aop->size <= (ic->op == '/' ? 2 : 1) && left->aop->size <= 2 && right->aop->size <= 1) + genDivMod1(ic); + else + genDivMod2(ic); + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genMinus - generates code for minus */ +/*-----------------------------------------------------------------*/ +static void +genMinus (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + + D (emit2 ("; genMinus", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + genSub (ic, result->aop, left->aop, right->aop); + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* exchangedCmp : returns the opcode need if the two operands are */ +/* exchanged in a comparison */ +/*-----------------------------------------------------------------*/ +static int +exchangedCmp (int opcode) +{ + switch (opcode) + { + case '<': + return '>'; + case '>': + return '<'; + case LE_OP: + return GE_OP; + case GE_OP: + return LE_OP; + case NE_OP: + return NE_OP; + case EQ_OP: + return EQ_OP; + default: + werror (E_INTERNAL_ERROR, __FILE__, __LINE__, "opcode not a comparison"); + } + return EQ_OP; /* shouldn't happen, but need to return something */ +} + +/*------------------------------------------------------------------*/ +/* branchInstCmp : returns the conditional branch instruction that */ +/* will branch if the comparison is true */ +/*------------------------------------------------------------------*/ +static char * +branchInstCmp (int opcode, int sign, bool negated) +{ + if (negated) + switch (opcode) + { + case '<': + opcode = GE_OP; + break; + case '>': + opcode = LE_OP; + break; + case LE_OP: + opcode = '>'; + break; + case GE_OP: + opcode = '<'; + break; + case NE_OP: + opcode = EQ_OP; + break; + case EQ_OP: + opcode = NE_OP; + break; + default: + werror (E_INTERNAL_ERROR, __FILE__, __LINE__, "opcode not a comparison"); + } + + switch (opcode) + { + case '<': + if (sign) + return "jrslt"; + else + return "jrc"; + case '>': + if (sign) + return "jrsgt"; + else + return "jrugt"; + case LE_OP: + if (sign) + return "jrsle"; + else + return "jrule"; + case GE_OP: + if (sign) + return "jrsge"; + else + return "jrnc"; + case NE_OP: + return "jrne"; + case EQ_OP: + return "jreq"; + default: + werror (E_INTERNAL_ERROR, __FILE__, __LINE__, "opcode not a comparison"); + } + return "brn"; +} + +/*------------------------------------------------------------------*/ +/* genCmp :- greater or less than (and maybe with equal) comparison */ +/* Handles cases where the decision can be made based on top bytes. */ +/*------------------------------------------------------------------*/ +static int +genCmpTop (operand *left, operand *right, operand *result, const iCode *ic) +{ + sym_link *letype, *retype; + int sign, opcode; + int size; + int ret = 0; + + D (emit2 ("; genCmpTop", "")); + + if (left->aop->type != AOP_LIT && right->aop->type != AOP_LIT) + return 0; + + opcode = ic->op; + sign = 0; + if (IS_SPEC (operandType (left)) && IS_SPEC (operandType (right))) + { + letype = getSpec (operandType (left)); + retype = getSpec (operandType (right)); + sign = !(SPEC_USIGN (letype) | SPEC_USIGN (retype)); + } + size = max (left->aop->size, right->aop->size); + + if (left->aop->type == AOP_LIT) + { + operand *temp = left; + wassert (right->aop->type != AOP_LIT); + left = right; + right = temp; + opcode = exchangedCmp (opcode); + } + wassert (right->aop->type == AOP_LIT); + + if ((size >= 2 && !sign && aopIsLitVal (right->aop, 0, size - 1, ~0) && aopIsLitVal (right->aop, size - 1, 1, 0x00) && opcode == '>')) + { + if (aopInReg (left->aop, size - 1, A_IDX) || aopOnStack (left->aop, size - 1, 1) || left->aop->type == AOP_DIR) + { + emit3_o (A_TNZ, left->aop, size - 1, 0, 0); + ret = 20; + } + else if (size > 2 || + (!aopInReg (left->aop, 0, X_IDX) && !aopInReg (left->aop, 0, Y_IDX) && (regDead (A_IDX, ic) || !regDead (X_IDX, ic)))) // When we can use tnzw moving to A costs more than we save by skipping a byte. + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + cheapMove (ASMOP_A, 0, left->aop, size - 1, FALSE); + emit3 (A_TNZ, ASMOP_A, NULL); + ret = 20; + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + } + else if (size >= 3 && !sign && aopIsLitVal (right->aop, 0, size - 2, ~0) && aopIsLitVal (right->aop, size - 2, 2, 0x0000) && opcode == '>') + { + if (aopInReg (left->aop, 2, X_IDX) || aopInReg (left->aop, 2, XH_IDX) && aopInReg (left->aop, 3, XL_IDX)) + emit3w (A_TNZW, ASMOP_X, NULL); + else if (aopInReg (left->aop, 2, Y_IDX) || aopInReg (left->aop, 2, YH_IDX) && aopInReg (left->aop, 3, YL_IDX)) + emit3w (A_TNZW, ASMOP_Y, NULL); + else if (regDead (X_IDX, ic) && (aopOnStackNotExt (left->aop, size - 2, 2) || left->aop->type == AOP_DIR)) + { + emit2 ("ldw", "x, %s", aopGet2 (left->aop, size - 2)); + cost (2 + (left->aop->type == AOP_DIR), 2); + } + else if (regDead (X_IDX, ic)) + { + genMove_o (ASMOP_X, 0, left->aop, size - 2, 2, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + emit3w (A_TNZW, ASMOP_X, NULL); + } + else if (size >= 2 && regDead (Y_IDX, ic) && (aopOnStackNotExt (left->aop, size - 2, 2) || left->aop->type == AOP_DIR)) + { + emit2 ("ldw", "y, %s", aopGet2 (left->aop, size - 2)); + cost (2 + 2 * (left->aop->type == AOP_DIR), 2); + } + else if (regDead (Y_IDX, ic)) + { + genMove_o (ASMOP_Y, 0, left->aop, size - 2, 2, regDead (A_IDX, ic), regDead (X_IDX, ic), TRUE); + emit3w (A_TNZW, ASMOP_Y, NULL); + } + else + { + push (ASMOP_X, 0, 2); + genMove_o (ASMOP_X, 0, left->aop, size - 2, 2, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + emit3w (A_TNZW, ASMOP_X, NULL); + pop (ASMOP_X, 0, 2); + } + ret = 20; + } + else if (sign && aopIsLitVal (right->aop, 0, size, 0) && opcode == '<') + { + if (aopInReg (left->aop, size - 1, A_IDX) || aopOnStack (left->aop, size - 1, 1) || left->aop->type == AOP_DIR) + emit3_o (A_TNZ, left->aop, size - 1, 0, 0); + else if (size >= 2 && aopInReg (left->aop, size - 2, X_IDX)) + emit3w (A_TNZW, ASMOP_X, NULL); + else if (size >= 2 && aopInReg (left->aop, size - 2, Y_IDX)) + emit3w (A_TNZW, ASMOP_Y, NULL); + else if (size >= 2 && regDead (X_IDX, ic)) + { + genMove_o (ASMOP_X, 0, left->aop, size - 2, 2, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + emit3w (A_TNZW, ASMOP_X, NULL); + } + else if (size >= 2 && regDead (Y_IDX, ic) && (aopOnStackNotExt (left->aop, size - 2, 2) || left->aop->type == AOP_DIR)) + { + emit2 ("ldw", "y, %s", aopGet2 (left->aop, size - 2)); + cost (2 + 2 * (left->aop->type == AOP_DIR), 2); + } + else if (size >= 2 && regDead (Y_IDX, ic)) + { + genMove_o (ASMOP_Y, 0, left->aop, size - 2, 2, regDead (A_IDX, ic), regDead (X_IDX, ic), TRUE); + emit3w (A_TNZW, ASMOP_Y, NULL); + } + else + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + cheapMove (ASMOP_A, 0, left->aop, size - 1, FALSE); + emit3 (A_TNZ, ASMOP_A, NULL); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + ret = 10; + } + + return ret; +} + +/*------------------------------------------------------------------*/ +/* genCmp :- greater or less than (and maybe with equal) comparison */ +/*------------------------------------------------------------------*/ +static void +genCmp (const iCode *ic, iCode *ifx) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + sym_link *letype, *retype; + int sign, opcode; + int size, i; + bool exchange = FALSE; + int special = 0; + + D (emit2 ("; genCmp", "")); + + opcode = ic->op; + sign = 0; + if (IS_SPEC (operandType (left)) && IS_SPEC (operandType (right))) + { + letype = getSpec (operandType (left)); + retype = getSpec (operandType (right)); + sign = !(SPEC_USIGN (letype) | SPEC_USIGN (retype)); + } + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + size = max (left->aop->size, right->aop->size); + + /* Prefer literal operand on right */ + if (left->aop->type == AOP_LIT || + right->aop->type != AOP_LIT && left->aop->type == AOP_DIR || + (aopInReg (right->aop, 0, A_IDX) || aopInReg (right->aop, 0, X_IDX) || aopInReg (right->aop, 0, Y_IDX)) && left->aop->type == AOP_STK) + exchange = TRUE; + + /* Right operand is a special literal */ + if ((special = genCmpTop(left, right, result, ic)) > 0) + goto _genCmp_1; + + /* Cannot do multibyte signed comparison, except for 2-byte using cpw */ + if (size > 1 && !(size == 2 && (right->aop->type == AOP_LIT || right->aop->type == AOP_DIR || right->aop->type == AOP_STK))) + { + if (exchange && (opcode == '<' || opcode == GE_OP)) + exchange = FALSE; + if (!exchange && (opcode == '>' || opcode == LE_OP)) + exchange = TRUE; + } + + if (exchange) + { + operand *temp = left; + left = right; + right = temp; + opcode = exchangedCmp (opcode); + } + + if (size == 1 && + (right->aop->type == AOP_LIT || right->aop->type == AOP_DIR || right->aop->type == AOP_STK) && + aopInReg (left->aop, 0, A_IDX)) + emit3 (A_CP, ASMOP_A, right->aop); + else if (size == 2 && (right->aop->type == AOP_LIT || right->aop->type == AOP_DIR || right->aop->type == AOP_STK)) + { + if (aopInReg (left->aop, 0, Y_IDX) && right->aop->type == AOP_STK) + { + if (regDead (X_IDX, ic) && regDead (Y_IDX, ic)) + { + emit2 ("ldw", "x, y"); + emit2 ("cpw", "x, %s", aopGet2 (right->aop, 0)); + cost (3, 3); + } + else + { + emit2 ("exgw", "x, y"); + emit2 ("cpw", "x, %s", aopGet2 (right->aop, 0)); + emit2 ("exgw", "x, y"); + cost (4, 4); + } + } + else + { + bool save_x = !regDead (X_IDX, ic) && !aopInReg (left->aop, 0, X_IDX) && !aopInReg (left->aop, 0, Y_IDX); + if (save_x) + push (ASMOP_X, 0, 2); + + if (!aopInReg (left->aop, 0, Y_IDX)) + genMove (ASMOP_X, left->aop, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + + emit2 ("cpw", aopInReg (left->aop, 0, Y_IDX) ? "y, %s" : "x, %s", aopGet2 (right->aop, 0)); + cost (3 + aopInReg (left->aop, 0, Y_IDX), 2); + + if (save_x) + pop (ASMOP_X, 0, 2); + } + } + else + { + bool pushed_a = false; + bool started = false; + + for (i = 0; i < size; i++) + if (i && aopInReg (left->aop, i, A_IDX) || aopInReg (right->aop, i, A_IDX)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + break; + } + + for (i = 0, started = false; i < size; i++) + { + const asmop *right_stacked = NULL; + int right_offset; + + if (!started && aopIsLitVal (right->aop, i, 2, 0) && (i + 1 < size)) // Skip over trailing 0x0000. + { + i++; + continue; + } + + if (!started && (aopInReg (left->aop, i, X_IDX) || aopInReg (left->aop, i, Y_IDX) && !aopOnStack(right->aop, i, 2)) && + (right->aop->type == AOP_LIT || right->aop->type == AOP_DIR || aopOnStack(right->aop, i, 2))) + { + bool x = aopInReg (left->aop, i, X_IDX); + emit2 ("cpw", x ? "x, %s" : "y, %s", aopGet2 (right->aop, i)); + cost ((x ? 3 : 4) - aopOnStack(right->aop, i, 2), 2); + i++; + started = true; + continue; + } + else if (!started && i + 1 < size && regDead (X_IDX, ic) && left->aop->regs[XL_IDX] < i && left->aop->regs[XH_IDX] < i && right->aop->regs[XL_IDX] < i && right->aop->regs[XH_IDX] < i && + (left->aop->type == AOP_LIT || left->aop->type == AOP_DIR || aopOnStack(left->aop, i, 2)) && + (right->aop->type == AOP_LIT || right->aop->type == AOP_DIR || aopOnStack(right->aop, i, 2))) + { + genMove_o (ASMOP_X, 0, left->aop, i, 2, regDead (A_IDX, ic) && left->aop->regs[A_IDX] <= i && right->aop->regs[A_IDX] < i, TRUE, FALSE); + emit2 ("cpw", "x, %s", aopGet2 (right->aop, i)); + cost (3 - aopOnStack(right->aop, i, 2), 2); + i++; + started = true; + continue; + } + + if (!regDead (A_IDX, ic) && !pushed_a && !aopInReg (left->aop, i, A_IDX)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + if (i && aopInReg (left->aop, i, A_IDX) && regDead (A_IDX, ic) && pushed_a) + { + pop (ASMOP_A, 0, 1); + pushed_a = FALSE; + } + else if (i && aopInReg (left->aop, i, A_IDX) && pushed_a) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + + right_stacked = stack_aop (right->aop, i, &right_offset); + + if (right_stacked || aopInReg (right->aop, i, A_IDX)) + { + emit2 (started ? "sbc" : "cp", "a, (%d, sp)", right_stacked ? right_offset : 1); + cost (2, 1); + } + else + emit3_o (started ? A_SBC : A_CP, ASMOP_A, 0, right->aop, i); + started = true; + + if (right_stacked) + pop (right_stacked, 0, 2); + } + + if (!regDead (A_IDX, ic) && pushed_a) + pop (ASMOP_A, 0, 1); + else if (pushed_a) + adjustStack (1, FALSE, FALSE, FALSE); + } + +_genCmp_1: + if (!special && !strcmp(branchInstCmp (opcode, sign, FALSE), "jrc") && !ifx && (aopInReg (result->aop, 0, A_IDX) || regDead (A_IDX, ic))) + { + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_RLC, ASMOP_A, 0); + cheapMove (result->aop, 0, ASMOP_A, 0, FALSE); + } + else if (!ifx) + { + symbol *tlbl1 = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + symbol *tlbl2 = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + if (tlbl1) + switch (special) + { + case 10: /* special cases by genCmpTop () */ + emit2 ("jrmi", "%05d$", labelKey2num (tlbl1->key)); + break; + case 20: /* special cases by genCmpTop () */ + emit2 ("jrne", "%05d$", labelKey2num (tlbl1->key)); + break; + default: /* normal cases */ + emit2 (branchInstCmp (opcode, sign, FALSE), "%05d$", labelKey2num (tlbl1->key)); + break; + } + cost (2, 0); + cheapMove (result->aop, 0, ASMOP_ZERO, 0, !regDead (A_IDX, ic)); + emitJP (tlbl2, 1.0f); + emitLabel (tlbl1); + cheapMove (result->aop, 0, ASMOP_ONE, 0, !regDead (A_IDX, ic)); + emitLabel (tlbl2); + } + else + { + symbol *tlbl = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + if (tlbl) + switch (special) + { + case 10: /* special cases by genCmpTop () */ + emit2 (IC_TRUE (ifx) ? "jrpl" : "jrmi", "%05d$", labelKey2num (tlbl->key)); + break; + case 20: /* special cases by genCmpTop () */ + emit2 (IC_TRUE (ifx) ? "jreq" : "jrne", "%05d$", labelKey2num (tlbl->key)); + break; + default: /* normal cases */ + emit2 (branchInstCmp (opcode, sign, IC_TRUE (ifx) ? TRUE : FALSE), "%05d$", labelKey2num (tlbl->key)); + break; + } + cost (2, 0); + emitJP (IC_TRUE (ifx) ? IC_TRUE (ifx) : IC_FALSE (ifx), 1.0f); + emitLabel (tlbl); + if (!regalloc_dry_run) + ifx->generated = 1; + } + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genCmpEQorNE - equal or not equal comparison */ +/*-----------------------------------------------------------------*/ +static void +genCmpEQorNE (const iCode *ic, iCode *ifx) +{ + operand *left, *right, *result; + int opcode; + int size, i; + symbol *tlbl_NE_pop = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + symbol *tlbl_NE = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + symbol *tlbl = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + bool pushed_a = FALSE, pop_a = FALSE; + int pushed; + + D (emit2 ("; genCmpEQorNE", "")); + + result = IC_RESULT (ic); + left = IC_LEFT (ic); + right = IC_RIGHT (ic); + + opcode = ic->op; + + pushed = G.stack.pushed; + + /* assign the amsops */ + aopOp (left, ic); + aopOp (right, ic); + aopOp (result, ic); + + size = max (left->aop->size, right->aop->size); + + for (i = 0; i < size;) + { + /* Prefer literal operand on right */ + if (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD || + right->aop->type != AOP_LIT && right->aop->type != AOP_IMMD && left->aop->type == AOP_DIR || + aopInReg (right->aop, i, A_IDX) && aopOnStack (left->aop, i, 1) || + (aopInReg (right->aop, i, X_IDX) || aopInReg (right->aop, i, Y_IDX)) && aopOnStack (left->aop, i, 2)) + { + operand *temp = left; + left = right; + right = temp; + } + + if (i <= size - 2 && (right->aop->type == AOP_LIT || right->aop->type == AOP_IMMD || right->aop->type == AOP_DIR || aopOnStack (right->aop, i, 2)) && !((aopInReg(left->aop, i, A_IDX) || aopInReg(left->aop, i + 1, A_IDX))&& pushed_a)) + { + bool x_dead = regDead (X_IDX, ic) && left->aop->regs[XL_IDX] <= i + 1 && left->aop->regs[XH_IDX] <= i + 1 && right->aop->regs[XL_IDX] <= i + 1 && right->aop->regs[XH_IDX] <= i + 1; + bool y_dead = regDead (Y_IDX, ic) && left->aop->regs[YL_IDX] <= i + 1 && left->aop->regs[YH_IDX] <= i + 1 && right->aop->regs[YL_IDX] <= i + 1 && right->aop->regs[YH_IDX] <= i + 1; + + /* Try to use flag setting from ldw */ + if((aopOnStackNotExt (left->aop, i, 2) || left->aop->type == AOP_DIR) && + right->aop->type == AOP_LIT && aopIsLitVal (right->aop, i, 2, 0x0000) && + (x_dead || y_dead)) + { + emit2 ("ldw", x_dead ? "x, %s" : "y, %s", aopGet2 (left->aop, i)); + cost (2 + (left->aop->type == AOP_DIR) * (2 - x_dead), 2); + } + else if (aopInReg (left->aop, i, Y_IDX) && aopOnStack (right->aop, i, 2)) + { + if (x_dead) + { + emit2 ("ldw", "x, y"); + emit2 ("cpw", "x, %s", aopGet2 (right->aop, i)); + cost (3, 3); + } + else + { + emit2 ("exgw", "x, y"); + emit2 ("cpw", "x, %s", aopGet2 (right->aop, i)); + emit2 ("exgw", "x, y"); + cost (4, 4); + } + } + else + { + bool cmp_y = aopInReg (left->aop, i, Y_IDX); + if (!cmp_y && !x_dead && !aopInReg (left->aop, i, X_IDX)) + push (ASMOP_X, 0, 2); + genMove_o (aopInReg (left->aop, i, Y_IDX) ? ASMOP_Y : ASMOP_X, 0, left->aop, i, 2, regDead (A_IDX, ic) && left->aop->regs[A_IDX] <= i + 1 && right->aop->regs[A_IDX] <= i + 1, TRUE, FALSE); + if (right->aop->type == AOP_LIT && aopIsLitVal (right->aop, i, 2, 0x0000)) + emit3w (A_TNZW, cmp_y ? ASMOP_Y : ASMOP_X, 0); + else if (right->aop->type == AOP_LIT && + (!cmp_y && (x_dead || !aopInReg (left->aop, i, X_IDX)) || cmp_y && regDead (Y_IDX, ic)) && + (aopIsLitVal (right->aop, i, 2, 0x0001) || aopIsLitVal (right->aop, i, 2, 0xffff))) + emit3w (aopIsLitVal (right->aop, i, 2, 0x0001) ? A_DECW : A_INCW, cmp_y ? ASMOP_Y : ASMOP_X, 0); + else + { + emit2 ("cpw", cmp_y ? "y, %s" : "x, %s", aopGet2 (right->aop, i)); + cost (3 + cmp_y, 2); + } + + if (!cmp_y && !x_dead && !aopInReg (left->aop, i, X_IDX)) + pop (ASMOP_X, 0, 2); + } + + i += 2; + } + else if (right->aop->type == AOP_LIT || right->aop->type == AOP_IMMD || right->aop->type == AOP_DIR || aopOnStack (right->aop, i, 1)) + { + if ((!regDead (A_IDX, ic) && !aopInReg (left->aop, i, A_IDX) || left->aop->regs[A_IDX] > i || right->aop->regs[A_IDX] > i) && !pushed_a) // Todo: Test A early instead! + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + else if (aopInReg (left->aop, i, A_IDX) && pushed_a) + { + pop (ASMOP_A, 0, 1); + pushed_a = FALSE; + } + + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + + if (right->aop->type == AOP_LIT && + !(aopInReg (left->aop, i, A_IDX) && !regDead (A_IDX, ic)) && + (aopIsLitVal (right->aop, i, 1, 0x01) || aopIsLitVal (right->aop, i, 1, 0xff))) + emit3 (aopIsLitVal (right->aop, i, 1, 0x01) ? A_DEC : A_INC, ASMOP_A, 0); + else + emit3_o (A_CP, ASMOP_A, 0, right->aop, i); + + i++; + } + else + { + if (!regalloc_dry_run) + { + fprintf(stderr, "ltype %d, lsize %d, rtype %d, rsize %d\n", left->aop->type, left->aop->size, right->aop->type, right->aop->size); + wassertl (0, "Unimplemented comparison operands."); + } + cost (180, 180); + + i++; + } + + if (size == 1 && pushed_a) // Popping it here once now is cheaper than doing it in multiple places later. + { + pop (ASMOP_A, 0, 1); + pushed_a = FALSE; + } + + if (pushed_a) + { + if (tlbl_NE_pop) + emit2 ("jrne", "%05d$", labelKey2num (tlbl_NE_pop->key)); + pop_a = TRUE; + } + else if (tlbl_NE) + emit2 ("jrne", "%05d$", labelKey2num (tlbl_NE->key)); + cost (2, 2); // Cycle cost is an estimate. + } + + if (pushed_a) + pop (ASMOP_A, 0, 1); + + wassertl (result->aop->size == 1 || ifx, "Unimplemented result size."); + + if (!ifx) + { + cheapMove (result->aop, 0, opcode == EQ_OP ? ASMOP_ONE : ASMOP_ZERO, 0, !regDead (A_IDX, ic)); + emitJP(tlbl, 0.0f); + if (pop_a) + { + emitLabel (tlbl_NE_pop); + pop (ASMOP_A, 0, 1); + } + emitLabel (tlbl_NE); + cheapMove (result->aop, 0, opcode == NE_OP ? ASMOP_ONE : ASMOP_ZERO, 0, !regDead (A_IDX, ic)); + emitLabel (tlbl); + } + else if (IC_TRUE (ifx) && opcode == EQ_OP || IC_FALSE (ifx) && opcode == NE_OP) + { + emitJP(IC_TRUE (ifx) ? IC_TRUE (ifx) : IC_FALSE (ifx), 0.0f); + if (pop_a) + { + emitLabel (tlbl_NE_pop); + pop (ASMOP_A, 0, 1); + } + emitLabel (tlbl_NE); + if (!regalloc_dry_run) + ifx->generated = 1; + } + else + { + emitJP(tlbl, 0.0f); + if (pop_a) + { + emitLabel (tlbl_NE_pop); + pop (ASMOP_A, 0, 1); + } + emitLabel (tlbl_NE); + emitJP(IC_TRUE (ifx) ? IC_TRUE (ifx) : IC_FALSE (ifx), 0.0f); + emitLabel (tlbl); + if (!regalloc_dry_run) + ifx->generated = 1; + } + + G.stack.pushed = pushed; + updateCFA (); + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genXor - code for or */ +/*-----------------------------------------------------------------*/ +static void +genXor (const iCode *ic) +{ + operand *left, *right, *result; + int size, i, j, omitbyte = -1; + bool result_in_a = false; + bool pushed_a = false; + + D (emit2 ("; genXor", "")); + + aopOp ((left = IC_LEFT (ic)), ic); + aopOp ((right = IC_RIGHT (ic)), ic); + aopOp ((result = IC_RESULT (ic)), ic); + + size = getSize (operandType (result)); + + /* Prefer literal operand on right */ + if (left->aop->type == AOP_LIT || + right->aop->type != AOP_LIT && left->aop->type == AOP_DIR || + (aopInReg (right->aop, 0, A_IDX) || aopInReg (right->aop, 0, X_IDX) || aopInReg (right->aop, 0, Y_IDX)) && left->aop->type == AOP_STK) + { + operand *temp = left; + left = right; + right = temp; + } + + // todo: Use bit complement instructions where it is faster. + if (!regDead (A_IDX, ic)) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + } + + // Byte in a needs to be handled first. + for (i = 0; i < size; i++) + if (aopInReg (left->aop, i, A_IDX) || aopInReg (right->aop, i, A_IDX)) + { + const asmop *other_stacked = 0; + int other_offset; + asmop *other = (aopInReg (left->aop, i, A_IDX) ? right : left)->aop; + + other_stacked = stack_aop (other, i, &other_offset); + + if (aopIsLitVal (right->aop, i, 1, 0)) + ; + else if (aopIsLitVal (right->aop, i, 1, 0xff)) + emit3 (A_CPL, ASMOP_A, 0); + else if (!other_stacked) + emit3_o (A_XOR, ASMOP_A, 0, other, i); + else + { + emit2 ("xor", "a, (%d, sp)", other_offset); + cost (2, 1); + } + omitbyte = i; + + if (other_stacked) + pop (other_stacked, 0, 2); + + if (aopInReg (result->aop, i, A_IDX) && size > 1) + result_in_a = true; + else + { + // Avoid overwriting operand. + if (aopRS (result->aop) && !aopOnStack (result->aop, i, 1)) + for (j = 0; j < size; j++) + { + if (i == j) + continue; + if (j < left->aop->size && aopRS (left->aop) && !aopOnStack (left->aop, j, 1) && + left->aop->aopu.bytes[j].byteu.reg->rIdx == result->aop->aopu.bytes[i].byteu.reg->rIdx || + j < right->aop->size && aopRS (right->aop) && !aopOnStack (right->aop, j, 1) && + right->aop->aopu.bytes[j].byteu.reg->rIdx == result->aop->aopu.bytes[i].byteu.reg->rIdx) + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented xor operand."); + cost (180, 180); + } + } + + cheapMove (result->aop, i, ASMOP_A, 0, false); + } + break; + } + + for (i = 0; i < size; i++) + { + const asmop *right_stacked = 0; + int right_offset; + + if (omitbyte == i) + continue; + + if (aopIsLitVal (right->aop, i, 1, 0)) + { + cheapMove (result->aop, i, left->aop, i, result_in_a); + if (aopInReg (result->aop, i, A_IDX)) + result_in_a = true; + continue; + } + + if (left->aop->type == AOP_DIR && aopSame (left->aop, i, result->aop, i, 1) && + right->aop->type == AOP_LIT && isLiteralBit (byteOfVal (right->aop->aopu.aop_lit, i)) >= 0) + { + emit2 ("bcpl", "%s, #%d", aopGet (left->aop, i), isLiteralBit (byteOfVal (right->aop->aopu.aop_lit, i))); + continue; + } + + right_stacked = stack_aop (right->aop, i, &right_offset); + + if (result_in_a) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + result_in_a = false; + } + + cheapMove (ASMOP_A, 0, left->aop, i, false); + + if (aopIsLitVal (right->aop, i, 1, 0xff)) + emit3 (A_CPL, ASMOP_A, 0); + else if (!right_stacked && !(i && aopInReg (right->aop, i, A_IDX))) + emit3_o (A_XOR, ASMOP_A, 0, right->aop, i); + else + { + emit2 ("xor", "a, (%d, sp)", right_offset); + cost (2, 1); + } + + if (right_stacked) + pop (right_stacked, 0, 2); + + if (!aopInReg (result->aop, i, A_IDX)) + cheapMove (result->aop, i, ASMOP_A, 0, false); + else + result_in_a = true; + } + + if (pushed_a) + pop (ASMOP_A, 0, 1); + + freeAsmop (left); + freeAsmop (right); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genOr - code for or */ +/*-----------------------------------------------------------------*/ +static void +genOr (const iCode *ic) +{ + operand *left, *right, *result; + int size, i, j, omitbyte = -1; + bool result_in_a = FALSE; + bool pushed_a = FALSE; + + D (emit2 ("; genOr", "")); + + aopOp ((left = IC_LEFT (ic)), ic); + aopOp ((right = IC_RIGHT (ic)), ic); + aopOp ((result = IC_RESULT (ic)), ic); + + size = getSize (operandType (result)); + + /* Prefer literal operand on right */ + if (left->aop->type == AOP_LIT || + right->aop->type != AOP_LIT && left->aop->type == AOP_DIR || + (aopInReg (right->aop, 0, A_IDX) || aopInReg (right->aop, 0, X_IDX) || aopInReg (right->aop, 0, Y_IDX)) && left->aop->type == AOP_STK) + { + operand *temp = left; + left = right; + right = temp; + } + + // todo: Use bit set instructions where it is faster. + if (!regDead (A_IDX, ic)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + // Byte in a needs to be handled first. + for (i = 0; i < size; i++) + if (aopInReg (left->aop, i, A_IDX) || aopInReg (right->aop, i, A_IDX)) + { + const asmop *other_stacked = NULL; + int other_offset; + asmop *other = (aopInReg (left->aop, i, A_IDX) ? right : left)->aop; + + other_stacked = stack_aop (other, i, &other_offset); + + if (aopIsLitVal (right->aop, i, 1, 0) || aopInReg (other, i, A_IDX)) + ; + else if (!other_stacked) + emit3_o (A_OR, ASMOP_A, 0, other, i); + else + { + emit2 ("or", "a, (%d, sp)", other_offset); + cost (2, 1); + } + omitbyte = i; + + if (other_stacked) + pop (other_stacked, 0, 2); + + if (aopInReg (result->aop, i, A_IDX) && size > 1) + result_in_a = TRUE; + else + { + // Avoid overwriting operand. + if (aopRS (result->aop) && !aopOnStack (result->aop, i, 1)) + for (j = 0; j < size; j++) + { + if (i == j) + continue; + if (j < left->aop->size && aopRS (left->aop) && !aopOnStack (left->aop, j, 1) && + left->aop->aopu.bytes[j].byteu.reg->rIdx == result->aop->aopu.bytes[i].byteu.reg->rIdx || + j < right->aop->size && aopRS (right->aop) && !aopOnStack (right->aop, j, 1) && + right->aop->aopu.bytes[j].byteu.reg->rIdx == result->aop->aopu.bytes[i].byteu.reg->rIdx) + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented or operand."); + cost (180, 180); + } + } + + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + } + + break; + } + + for (i = 0; i < size;) + { + const asmop *right_stacked = NULL; + int right_offset; + + if (omitbyte == i) + i++; + else if (aopIsLitVal (right->aop, i, 2, 0x8000) && aopInReg (result->aop, i, X_IDX) && (aopOnStack (left->aop, i, 2) || left->aop->type == AOP_IMMD)) + { + genMove_o (ASMOP_X, 0, left->aop, i, 2, TRUE, TRUE, regFree (Y_IDX, ic)); + emit3w (A_SLLW, ASMOP_X, 0); + emit2 ("scf", ""); + cost (1, 1); + emit3w (A_RRCW, ASMOP_X, 0); + i += 2; + } + else if (aopIsLitVal (right->aop, i, 2, 0x0001) && aopInReg (result->aop, i, X_IDX) && (aopOnStack (left->aop, i, 2) || left->aop->type == AOP_IMMD)) + { + genMove_o (ASMOP_X, 0, left->aop, i, 2, TRUE, TRUE, regFree (Y_IDX, ic)); + emit3w (A_SRLW, ASMOP_X, 0); + emit2 ("scf", ""); + cost (1, 1); + emit3w (A_RLCW, ASMOP_X, 0); + i += 2; + } + else if (aopIsLitVal (right->aop, i, 1, 0x80) && (aopInReg (result->aop, i, XH_IDX) && aopInReg (left->aop, i, XH_IDX) || aopInReg (result->aop, i, YH_IDX) && aopInReg (left->aop, i, YH_IDX))) + { + emit3w (A_SLLW, aopInReg (result->aop, i, XH_IDX) ? ASMOP_X : ASMOP_Y, 0); + emit2 ("scf", ""); + cost (1, 1); + emit3w (A_RRCW, aopInReg (result->aop, i, XH_IDX) ? ASMOP_X : ASMOP_Y, 0); + i++; + } + else if (aopIsLitVal (right->aop, i, 1, 0x01) && (aopInReg (result->aop, i, XL_IDX) && aopInReg (left->aop, i, XL_IDX) || aopInReg (result->aop, i, YL_IDX) && aopInReg (left->aop, i, YL_IDX))) + { + emit3w (A_SRLW, aopInReg (result->aop, i, XL_IDX) ? ASMOP_X : ASMOP_Y, 0); + emit2 ("scf", ""); + cost (1, 1); + emit3w (A_RLCW, aopInReg (result->aop, i, XL_IDX) ? ASMOP_X : ASMOP_Y, 0); + i++; + } + else if (aopIsLitVal (right->aop, i, 1, 0x00)) // If long sequences of 0x00 are common, we should use genMove_o instead. + { + cheapMove (result->aop, i, left->aop, i, result_in_a && !pushed_a); + if (aopInReg (result->aop, i, A_IDX) && i != size - 1) + result_in_a = TRUE; + i++; + } + else if (aopOnStack (left->aop, i, 1) && aopOnStack (result->aop, i, 1) && result->aop->aopu.bytes[i].byteu.stk == left->aop->aopu.bytes[i].byteu.stk && aopIsLitVal (right->aop, i, 1, 0x80)) + { + emit3_o (A_SLL, left->aop, i, 0, 0); + emit2 ("scf", ""); + cost (1, 1); + emit3_o (A_RRC, left->aop, i, 0, 0); + i++; + } + else if (aopOnStack (left->aop, i, 1) && aopOnStack (result->aop, i, 1) && result->aop->aopu.bytes[i].byteu.stk == left->aop->aopu.bytes[i].byteu.stk && aopIsLitVal (right->aop, i, 1, 0x01)) + { + emit3_o (A_SRL, left->aop, i, 0, 0); + emit2 ("scf", ""); + cost (1, 1); + emit3_o (A_RLC, left->aop, i, 0, 0); + i++; + } + else if (left->aop->type == AOP_DIR && aopSame (left->aop, i, result->aop, i, 1) && + right->aop->type == AOP_LIT && isLiteralBit (byteOfVal (right->aop->aopu.aop_lit, i)) >= 0) + { + emit2 ("bset", "%s, #%d", aopGet (left->aop, i), isLiteralBit (byteOfVal (right->aop->aopu.aop_lit, i))); + i++; + } + else + { + if (result_in_a) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + result_in_a = false; + } + + right_stacked = stack_aop (right->aop, i, &right_offset); + + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + + if (!right_stacked && !(i && aopInReg (right->aop, i, A_IDX))) + emit3_o (A_OR, ASMOP_A, 0, right->aop, i); + else + { + emit2 ("or", "a, (%d, sp)", right_offset); + cost (2, 1); + } + + if (right_stacked) + pop (right_stacked, 0, 2); + + if (!aopInReg (result->aop, i, A_IDX) || i == size - 1) + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + else + result_in_a = TRUE; + i++; + } + } + + if (pushed_a) + pop (ASMOP_A, 0, 1); + + freeAsmop (left); + freeAsmop (right); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genAnd - code for and */ +/*-----------------------------------------------------------------*/ +static void +genAnd (const iCode *ic, iCode *ifx) +{ + operand *left, *right, *result; + int size, i, j, omitbyte = -1; + bool pushed_a = FALSE; + bool result_in_a = FALSE; + + D (emit2 ("; genAnd", "")); + + aopOp ((left = IC_LEFT (ic)), ic); + aopOp ((right = IC_RIGHT (ic)), ic); + aopOp ((result = IC_RESULT (ic)), ic); + + size = getSize (operandType (result)); + + /* Prefer literal operand on right */ + if (left->aop->type == AOP_LIT || + right->aop->type != AOP_LIT && left->aop->type == AOP_DIR || + (aopInReg (right->aop, 0, A_IDX) || aopInReg (right->aop, 0, X_IDX) || aopInReg (right->aop, 0, Y_IDX)) && left->aop->type == AOP_STK) + { + operand *temp = left; + left = right; + right = temp; + } + + if (ifx && result->aop->type == AOP_CND) // TODO: Use sll for 0x7f, srl for 0xfe, swap for 0x08, sll for 0x40. Allow non-literal (and enable in ralloc2.cc) + { + int nonzero; + symbol *tlbl = regalloc_dry_run ? 0 : newiTempLabel (NULL); + + wassertl (right->aop->type == AOP_LIT, "Code generation for bitwise and can only jump on literal operands"); + + // Find the non-zero byte. + for (j = 0, nonzero = 0, i = -1; j < size; j++) + if (byteOfVal (right->aop->aopu.aop_lit, j)) + { + i = j; + nonzero++; + } + + wassertl (nonzero <= 1, "Code generation for bitwise and can handle at most one nonzero byte"); + + if (!nonzero) + goto release; + + // Try to use btjt / btjf. + if (left->aop->type == AOP_DIR && isLiteralBit (ulFromVal (right->aop->aopu.aop_lit)) >= 0) + { + symbol *tlbl = regalloc_dry_run ? 0 : newiTempLabel (NULL); + if (tlbl) + { + emit2 (IC_TRUE (ifx) ? "btjf" : "btjt", "%s, #%d, !tlabel", aopGet (left->aop, i), isLiteralBit (ulFromVal (right->aop->aopu.aop_lit)) - i * 8, labelKey2num (tlbl->key)); + emit2 (options.model == MODEL_LARGE ? "jpf" : "jp", "!tlabel", labelKey2num ((IC_TRUE (ifx) ? IC_TRUE (ifx) : IC_FALSE (ifx))->key)); + emitLabel (tlbl); + } + cost (8, 4); // Hmm. Cost 2 or 3 for btjf? + goto release; + } + + if (byteOfVal (right->aop->aopu.aop_lit, i) == 0x80) + { + if (aopInReg (left->aop, i, XH_IDX)) + emit3w (A_TNZW, ASMOP_X, 0); + else if (aopInReg (left->aop, i, YH_IDX)) + emit3w (A_TNZW, ASMOP_Y, 0); + else if (aopInReg (left->aop, i, A_IDX) || aopOnStack (left->aop, i, 1) || left->aop->type == AOP_DIR) + emit3_o (A_TNZ, left->aop, i, 0, 0); + else + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + emit3 (A_TNZ, ASMOP_A, 0); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + if (!regalloc_dry_run) + emit2 (IC_TRUE (ifx) ? "jrpl" : "jrmi", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // Hmm. Cycle cost overestimate. + } + else if (byteOfVal (right->aop->aopu.aop_lit, i) == 0x01 && + (aopInReg (left->aop, i, XL_IDX) && regDead (X_IDX, ic) || aopInReg (left->aop, i, YL_IDX) && regDead (Y_IDX, ic))) + { + emit3w (A_SRLW, aopInReg (left->aop, i, XL_IDX) ? ASMOP_X : ASMOP_Y, 0); + if (!regalloc_dry_run) + emit2 (IC_TRUE (ifx) ? "jrnc" : "jrc", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // Hmm. Cycle cost overestimate. + } + else if (byteOfVal (right->aop->aopu.aop_lit, i) == 0x01 && + (regDead (A_IDX, ic) || !aopInReg (left->aop, i, A_IDX))) + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + emit3 (A_SRL , ASMOP_A, 0); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + if (!regalloc_dry_run) + emit2 (IC_TRUE (ifx) ? "jrnc" : "jrc", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // Hmm. Cycle cost overestimate. + } + else if (byteOfVal (right->aop->aopu.aop_lit, i) == 0xff) + { + if (aopInReg (left->aop, i, A_IDX) || aopOnStack (left->aop, i, 1) || left->aop->type == AOP_DIR) + emit3_o (A_TNZ, left->aop, i, 0, 0); + else + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + emit3 (A_TNZ, ASMOP_A, 0); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + if (!regalloc_dry_run) + emit2 (IC_TRUE (ifx) ? "jreq" : "jrne", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // Hmm. Cycle cost overestimate. + } + else + { + if (!regDead (A_IDX, ic) && !aopInReg (left->aop, i, A_IDX)) + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + emit3_o (A_BCP, ASMOP_A, 0, right->aop, i); + if (!regDead (A_IDX, ic) && !aopInReg (left->aop, i, A_IDX)) + pop (ASMOP_A, 0, 1); + if (!regalloc_dry_run) + emit2 (IC_TRUE (ifx) ? "jreq" : "jrne", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // Hmm. Cycle cost overestimate. + } + emitJP(IC_TRUE (ifx) ? IC_TRUE (ifx) : IC_FALSE (ifx), 1.0f); // Hmm. Cycle cost overestimate. + emitLabel (tlbl); + goto release; + } + + // Byte in a needs to be handled first. + for (i = 0; i < size; i++) + if (aopInReg (left->aop, i, A_IDX) || aopInReg (right->aop, i, A_IDX)) + { + const asmop *other_stacked = NULL; + int other_offset; + asmop *other = (aopInReg (left->aop, i, A_IDX) ? right : left)->aop; + + if (!regDead (A_IDX, ic)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + if (aopInReg (left->aop, i, A_IDX) && aopIsLitVal (right->aop, i, 1, 0x00)) // A is dead, it just doesn't know it yet. + break; + + other_stacked = stack_aop (other, i, &other_offset); + + if (aopIsLitVal (right->aop, i, 1, 0xff)) + ; + else if (!other_stacked) + emit3_o (A_AND, ASMOP_A, 0, other, i); + else + { + emit2 ("and", "a, (%d, sp)", other_offset); + cost (2, 1); + } + omitbyte = i; + + if (other_stacked) + pop (other_stacked, 0, 2); + + if (aopInReg (result->aop, i, A_IDX) && size > 1) + if (pushed_a) + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented and operand."); + cost (180, 180); + } + else + { + push (ASMOP_A, 0, 1); // todo: Do not push, if other bytes do not affect a (e.g. due to using clr). + pushed_a = TRUE; + } + else + { + // Avoid overwriting operand. + if (aopRS (result->aop) && !aopOnStack (result->aop, i, 1)) + for (j = 0; j < size; j++) + { + if (i == j) + continue; + if (j < left->aop->size && aopRS (left->aop) && !aopOnStack (left->aop, j, 1) && + left->aop->aopu.bytes[j].byteu.reg->rIdx == result->aop->aopu.bytes[i].byteu.reg->rIdx || + j < right->aop->size && aopRS (right->aop) && !aopOnStack (right->aop, j, 1) && + right->aop->aopu.bytes[j].byteu.reg->rIdx == result->aop->aopu.bytes[i].byteu.reg->rIdx) + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented and operand."); + cost (180, 180); + } + } + + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + } + + break; + } + + for (i = 0; i < size;) + { + // Cases that don't need a free a. + if (omitbyte == i) + { + i++; + continue; + } + else if (aopIsLitVal (right->aop, i, 1, 0x7f) && (aopInReg (result->aop, i, XH_IDX) && aopInReg (left->aop, i, XH_IDX) || aopInReg (result->aop, i, YH_IDX) && aopInReg (left->aop, i, YH_IDX))) + { + emit3w (A_SLLW, aopInReg (result->aop, i, XH_IDX) ? ASMOP_X : ASMOP_Y, 0); + emit3w (A_SRLW, aopInReg (result->aop, i, XH_IDX) ? ASMOP_X : ASMOP_Y, 0); + i++; + continue; + } + else if (aopIsLitVal (right->aop, i, 1, 0xfe) && (aopInReg (result->aop, i, XL_IDX) && aopInReg (left->aop, i, XL_IDX) || aopInReg (result->aop, i, YL_IDX) && aopInReg (left->aop, i, YL_IDX))) + { + emit3w (A_SRLW, aopInReg (result->aop, i, XL_IDX) ? ASMOP_X : ASMOP_Y, 0); + emit3w (A_SLLW, aopInReg (result->aop, i, XL_IDX) ? ASMOP_X : ASMOP_Y, 0); + i++; + continue; + } + else if (aopOnStack (left->aop, i, 1) && aopOnStack (result->aop, i, 1) && result->aop->aopu.bytes[i].byteu.stk == left->aop->aopu.bytes[i].byteu.stk && aopIsLitVal (right->aop, i, 1, 0x7f)) + { + emit3_o (A_SLL, left->aop, i, 0, 0); + emit3_o (A_SRL, left->aop, i, 0, 0); + i++; + continue; + } + else if (aopOnStack (left->aop, i, 1) && aopOnStack (result->aop, i, 1) && result->aop->aopu.bytes[i].byteu.stk == left->aop->aopu.bytes[i].byteu.stk && aopIsLitVal (right->aop, i, 1, 0xfe)) + { + emit3_o (A_SRL, left->aop, i, 0, 0); + emit3_o (A_SLL, left->aop, i, 0, 0); + i++; + continue; + } + else if (aopIsLitVal (right->aop, i, 1, 0xff) && aopSame (left->aop, i, result->aop, i, 1)) + { + i++; + continue; + } + else if (aopIsLitVal (right->aop, i, 2, 0x7fff) && aopInReg (result->aop, i, X_IDX) && (aopInReg (left->aop, i, X_IDX) || aopOnStack (left->aop, i, 2) || left->aop->type == AOP_IMMD)) + { + genMove_o (ASMOP_X, 0, left->aop, i, 2, pushed_a || (regDead (A_IDX, ic) && !result_in_a), TRUE, regFree (Y_IDX, ic)); + emit3w (A_SLLW, ASMOP_X, 0); + emit3w (A_SRLW, ASMOP_X, 0); + i += 2; + continue; + } + else if (aopIsLitVal (right->aop, i, 2, 0xffffe) && aopInReg (result->aop, i, X_IDX) && (aopInReg (left->aop, i, X_IDX) || aopOnStack (left->aop, i, 2) || left->aop->type == AOP_IMMD)) + { + genMove_o (ASMOP_X, 0, left->aop, i, 2, pushed_a || (regDead (A_IDX, ic) && !result_in_a), TRUE, regFree (Y_IDX, ic)); + emit3w (A_SRLW, ASMOP_X, 0); + emit3w (A_SLLW, ASMOP_X, 0); + i += 2; + continue; + } + else if (left->aop->type == AOP_DIR && aopSame (left->aop, i, result->aop, i, 1) && + right->aop->type == AOP_LIT && isLiteralBit (~byteOfVal (right->aop->aopu.aop_lit, i) & 0xff) >= 0) + { + emit2 ("bres", "%s, #%d", aopGet (left->aop, i), isLiteralBit (~byteOfVal (right->aop->aopu.aop_lit, i) & 0xff)); + i++; + continue; + } + + // Cases that want a free a. + if (!pushed_a && !(regDead (A_IDX, ic) && !result_in_a)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + if (aopIsLitVal (right->aop, i, 1, 0x00)) + { + bool new_in_a = FALSE; + for(j = i; j < size && j != omitbyte && aopIsLitVal (right->aop, j, 1, 0x00); j++) + if (aopInReg (result->aop, j, A_IDX)) + new_in_a = TRUE; + genMove_o (result->aop, i, ASMOP_ZERO, 0, j - i, TRUE, regFree (X_IDX, ic), regFree (Y_IDX, ic)); + result_in_a |= new_in_a; + i = j; + } + else if (aopIsLitVal (right->aop, i, 1, 0xff)) + { + bool new_in_a = FALSE; + for(j = i; j < size && j != omitbyte && aopIsLitVal (right->aop, j, 1, 0xff); j++) + if (aopInReg (result->aop, j, A_IDX)) + new_in_a = TRUE; + genMove_o (result->aop, i, left->aop, i, j - i, TRUE, regFree (X_IDX, ic), regFree (Y_IDX, ic)); + result_in_a |= new_in_a; + i = j; + } + else + { + const asmop *right_stacked = NULL; + int right_offset; + + wassert (pushed_a || regDead (A_IDX, ic) && !result_in_a); + + right_stacked = stack_aop (right->aop, i, &right_offset); + + cheapMove (ASMOP_A, 0, left->aop, i, FALSE); + + if (!right_stacked && !(i && aopInReg (right->aop, i, A_IDX))) + emit3_o (A_AND, ASMOP_A, 0, right->aop, i); + else + { + emit2 ("and", "a, (%d, sp)", right_offset); + cost (2, 1); + } + + if (right_stacked) + pop (right_stacked, 0, 2); + + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + + if (aopInReg (result->aop, i, A_IDX)) + result_in_a = TRUE; + + i++; + } + } + + if (pushed_a) + pop (ASMOP_A, 0, 1); + +release: + freeAsmop (left); + freeAsmop (right); + freeAsmop (result); +} + +/*------------------------------------------------------------------*/ +/* init_shiftop - find a good place to shift in */ +/*------------------------------------------------------------------*/ +static void init_shiftop(asmop *shiftop, const asmop *result, const asmop *left, const asmop *right, const iCode *ic, bool a_needed_for_count) +{ + int i; + const int size = result->size; + unsigned int shCount = right->type == AOP_LIT ? ulFromVal (right->aopu.aop_lit) : 0; + bool all_in_reg = TRUE; + + shiftop->size = size; + shiftop->regs[A_IDX] = -1; + shiftop->regs[XL_IDX] = -1; + shiftop->regs[XH_IDX] = -1; + shiftop->regs[YL_IDX] = -1; + shiftop->regs[YH_IDX] = -1; + + for (i = 0; i < size;) + { + bool same_2_stack = aopOnStack (left, 0, 2) && aopOnStack (result, 0, 2) && left->aopu.bytes[i].byteu.stk == result->aopu.bytes[i].byteu.stk; + bool same_1_stack = aopOnStack (left, 0, 1) && aopOnStack (result, 0, 1) && left->aopu.bytes[i].byteu.stk == result->aopu.bytes[i].byteu.stk; + + if (!a_needed_for_count && aopInReg (left, i, A_IDX) && regDead (A_IDX, ic) && result->regs[A_IDX] == -1 && (size <= 1 || shCount >= 2)) + { + shiftop->aopu.bytes[i] = left->aopu.bytes[i]; + shiftop->regs[A_IDX] = i; + i++; + } + else if (aopInReg (left, i, X_IDX) && regDead (X_IDX, ic) && result->regs[XL_IDX] == -1 && result->regs[XH_IDX] == -1 && right->regs[XL_IDX] == -1 && right->regs[XH_IDX] == -1) + { + shiftop->aopu.bytes[i] = left->aopu.bytes[i]; + shiftop->aopu.bytes[i + 1] = left->aopu.bytes[i + 1]; + shiftop->regs[XL_IDX] = i; + shiftop->regs[XH_IDX] = i + 1; + i += 2; + } + else if (aopInReg (left, i, Y_IDX) && regDead (Y_IDX, ic) && result->regs[YL_IDX] == -1 && result->regs[YH_IDX] == -1 && !aopInReg (result, i, X_IDX)) + { + shiftop->aopu.bytes[i] = left->aopu.bytes[i]; + shiftop->aopu.bytes[i + 1] = left->aopu.bytes[i + 1]; + shiftop->regs[YL_IDX] = i; + shiftop->regs[YH_IDX] = i + 1; + i += 2; + } + // Try to shift in x instead of on stack. + else if ((aopOnStack (left, i, 2) || left->type == AOP_LIT) && aopOnStack (result, i, 2) && !same_2_stack && regDead (X_IDX, ic) && + shiftop->regs[XL_IDX] == -1 && shiftop->regs[XH_IDX] == -1 && + left->regs[XL_IDX] == -1 && left->regs[XH_IDX] == -1 && result->regs[XL_IDX] == -1 && result->regs[XH_IDX] == -1 && right->regs[XL_IDX] == -1 && right->regs[XH_IDX] == -1) + { + shiftop->aopu.bytes[i] = ASMOP_X->aopu.bytes[0]; + shiftop->aopu.bytes[i + 1] = ASMOP_X->aopu.bytes[1]; + shiftop->regs[XL_IDX] = i; + shiftop->regs[XH_IDX] = i + 1; + i += 2; + } + // Try to shift in y instead of on stack. + else if (size == 2 && (aopOnStack (left, i, 2) || left->type == AOP_LIT) && aopOnStack (result, i, 2) && !same_2_stack && regDead (Y_IDX, ic) && + shiftop->regs[YL_IDX] == -1 && shiftop->regs[YH_IDX] == -1 && + left->regs[YL_IDX] == -1 && left->regs[YH_IDX] == -1 && result->regs[YL_IDX] == -1 && result->regs[YH_IDX] == -1) + { + shiftop->aopu.bytes[i] = ASMOP_Y->aopu.bytes[0]; + shiftop->aopu.bytes[i + 1] = ASMOP_Y->aopu.bytes[1]; + shiftop->regs[YL_IDX] = i; + shiftop->regs[YH_IDX] = i + 1; + i += 2; + } + else if (!a_needed_for_count && size == 1 && aopOnStack (left, i, 1) && aopOnStack (result, i, 1) && !same_1_stack && regDead (A_IDX, ic) && shiftop->regs[A_IDX] == -1 && result->regs[A_IDX] == -1 && left->regs[A_IDX] == -1) // TODO: More cases. + { + shiftop->aopu.bytes[i] = ASMOP_A->aopu.bytes[0]; + shiftop->regs[A_IDX] = i; + i++; + } + else + { + shiftop->aopu.bytes[i] = result->aopu.bytes[i]; + if (result->aopu.bytes[i].in_reg) + shiftop->regs[result->aopu.bytes[i].byteu.reg->rIdx] = i; + i++; + } + } + + for (i = 0; i < size; i++) + if (!shiftop->aopu.bytes[i].in_reg) + all_in_reg = FALSE; + shiftop->type = all_in_reg ? AOP_REG : AOP_REGSTK; +} + +/*------------------------------------------------------------------*/ +/* genLeftShiftLiteral - left shifting by known count for size <= 2 */ +/*------------------------------------------------------------------*/ +static void +genLeftShiftLiteral (operand *left, operand *right, operand *result, const iCode *ic) +{ + unsigned int shCount = ulFromVal (right->aop->aopu.aop_lit); + unsigned int size, i; + + struct asmop shiftop_impl; + struct asmop *shiftop; + + D (emit2 ("; genLeftShiftLiteral", "")); + + size = getSize (operandType (result)); + + aopOp (left, ic); + aopOp (result, ic); + + if (shCount > (size * 8)) + shCount = size * 8; + + if (shCount >= (size * 8)) + { + genMove(result->aop, ASMOP_ZERO, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + goto release; + } + + wassertl (size <= 2 || shCount % 8 <= 1, "Shifting of longs and long longs by non-trivial values should be handled by generic function."); + + if (shCount < 8 && aopRS (left->aop) && aopRS (result->aop)) + { + shiftop = &shiftop_impl; + init_shiftop (shiftop, result->aop, left->aop, right->aop, ic, FALSE); + genMove (shiftop, left->aop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + } + else if (size == 2 && shCount >= 8 && regDead (A_IDX, ic) && (aopInReg (left->aop, 0, XL_IDX) && aopInReg (result->aop, 0, X_IDX) || aopInReg (left->aop, 0, YL_IDX) && aopInReg (result->aop, 0, Y_IDX))) + { + shiftop = result->aop; + emit3 (A_CLR, ASMOP_A, 0); + emit3w (A_RLWA, shiftop, 0); + shCount -= 8; + } + else + { + shiftop = result->aop; + genMove_o (shiftop, shCount / 8, left->aop, 0, size - shCount / 8, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + genMove_o (shiftop, 0, ASMOP_ZERO, 0, shCount / 8, regDead (A_IDX, ic) && shiftop->regs[A_IDX] < 0, regDead (X_IDX, ic) && shiftop->regs[XL_IDX] < 0 && shiftop->regs[XH_IDX] < 0, regDead (Y_IDX, ic) && shiftop->regs[YL_IDX] < 0 && shiftop->regs[YH_IDX] < 0); + shCount %= 8; + } + + if (size == 1 && aopRS (shiftop) && shCount) + { + int std_bytes, swap_bytes = 0, mul_bytes = 0; + int std_cycles, swap_cycles = 0, mul_cycles = 0; + bool swap_possible = FALSE; + bool mul_possible = FALSE; + + if (aopInReg (shiftop, 0, A_IDX) || aopInReg (shiftop, 0, XL_IDX) && regDead (XH_IDX, ic)) + { + std_bytes = shCount; + std_cycles = shCount; + } + else if (aopOnStack (shiftop, 0, 1)) + { + std_bytes = shCount * 2; + std_cycles = shCount; + } + else if (aopInReg (shiftop, 0, YH_IDX)) + { + std_bytes = shCount * 5; + std_cycles = shCount * 3; + } + else + { + std_bytes = shCount * 3; + std_cycles = shCount * 3; + } + + if (!aopOnStack (shiftop, 0, 1)) + { + swap_bytes = shCount + !aopInReg (shiftop, 0, A_IDX) * 2 + aopInReg (shiftop, 0, YH_IDX) * 2; + swap_cycles = shCount + !aopInReg (shiftop, 0, A_IDX) * 2; + if (shCount >= 4) + { + swap_bytes -= 1; + swap_cycles -= 2; + } + swap_possible = TRUE; + } + + if (aopInReg (shiftop, 0, XL_IDX) && regDead (XH_IDX, ic) || aopInReg (shiftop, 0, YL_IDX) && regDead (YH_IDX, ic)) + { + mul_bytes = 3 + aopInReg (shiftop, 0, YL_IDX) + !regDead (A_IDX, ic) * 2; + mul_cycles = 2 + !regDead (A_IDX, ic) * 2; + mul_possible = TRUE; + } + + if (swap_possible && (swap_bytes <= std_bytes && swap_cycles <= std_cycles || swap_bytes < std_bytes && optimize.codeSize || swap_cycles < std_cycles && optimize.codeSpeed)) // swap better than std + { + if (mul_possible && (mul_bytes <= swap_bytes && mul_cycles <= swap_cycles || mul_bytes < swap_bytes && optimize.codeSize || mul_cycles < swap_cycles && optimize.codeSpeed)) // mul better than swap + goto mul; + goto swap; + } + if (mul_possible && (mul_bytes <= std_bytes && mul_cycles <= std_cycles || mul_bytes < std_bytes && optimize.codeSize || mul_cycles < std_cycles && optimize.codeSpeed)) // mul better than std + goto mul; + goto std; + +swap: + swap_to_a (shiftop->aopu.bytes[0].byteu.reg->rIdx); + if (shCount >= 4) + { + emit3 (A_SWAP, ASMOP_A, 0); + emit2 ("and", "a, #0xf0"); + cost (2, 1); + shCount -= 4; + } + while (shCount--) + emit3 (A_SLL, ASMOP_A, 0); + swap_from_a (shiftop->aopu.bytes[0].byteu.reg->rIdx); + genMove (result->aop, shiftop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + goto release; + +mul: + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + emit2 ("ld", "a, #0x%02x", 1 << shCount); + cost (2, 1); + emit2 ("mul", aopInReg (shiftop, 0, YL_IDX) ? "y, a" : "x, a"); + cost (4, 1 + aopInReg (shiftop, 0, YL_IDX)); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + genMove (result->aop, shiftop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + goto release; + } +std: + + while (shCount--) + for (i = 0; i < size;) + { + if (aopInReg (shiftop, i, X_IDX) || aopInReg (shiftop, i, Y_IDX)) + { + emit3w_o (i ? A_RLCW : A_SLLW, shiftop, i, 0, 0); + i += 2; + } + else if (i == size - 1 && aopInReg (shiftop, i, XL_IDX) && regDead (XH_IDX, ic) && shiftop->regs[XH_IDX] < 0) + { + emit3w_o (i ? A_RLCW : A_SLLW, ASMOP_X, 0, 0, 0); + i++; + } + else + { + int swapidx = -1; + if (aopRS (shiftop) && !aopInReg (shiftop, i, A_IDX) && shiftop->aopu.bytes[i].in_reg) + swapidx = shiftop->aopu.bytes[i].byteu.reg->rIdx; + + if (swapidx == -1) + emit3_o (i ? A_RLC : A_SLL, shiftop, i, 0, 0); + else + { + swap_to_a (swapidx); + emit3 (i ? A_RLC : A_SLL, ASMOP_A, 0); + swap_from_a (swapidx); + } + + i++; + } + } + + + genMove (result->aop, shiftop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + +release: + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genLeftShift - generates code for left shifting */ +/*-----------------------------------------------------------------*/ +static void +genLeftShift (const iCode *ic) +{ + operand *left, *right, *result; + int i, size; + bool pushed_a = FALSE; + symbol *tlbl1, *tlbl2; + unsigned int iterations; + int skip_bytes = 0; + + struct asmop shiftop_impl; + struct asmop *shiftop; + + right = IC_RIGHT (ic); + left = IC_LEFT (ic); + result = IC_RESULT (ic); + + aopOp (right, ic); + + /* if the shift count is known then do it + as efficiently as possible */ + if (right->aop->type == AOP_LIT && + (getSize (operandType (result)) <= 2 || + ulFromVal (right->aop->aopu.aop_lit) % 8 <= 1 || + ulFromVal (right->aop->aopu.aop_lit) >= getSize (operandType (result)) * 8 )) + { + genLeftShiftLiteral (left, right, result, ic); + freeAsmop (right); + return; + } + + D (emit2 ("; genLeftShift", "")); + + aopOp (result, ic); + aopOp (left, ic); + + if (!regDead (A_IDX, ic)) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + } + + if (aopRS (left->aop) && aopRS (result->aop)) + { + shiftop = &shiftop_impl; + init_shiftop (shiftop, result->aop, left->aop, right->aop, ic, false); + } + else + shiftop = result->aop; + + iterations = (right->aop->type == AOP_LIT ? byteOfVal (right->aop->aopu.aop_lit, 0) : 2); + + if (right->aop->type == AOP_LIT) + { + skip_bytes = iterations / 16 * 2; + genMove_o (shiftop, skip_bytes, left->aop, 0, shiftop->size - skip_bytes, right->aop->regs[A_IDX] < 0, regDead (X_IDX, ic) && right->aop->regs[XL_IDX] < 0 && right->aop->regs[XH_IDX] < 0, regDead (Y_IDX, ic) && right->aop->regs[YL_IDX] < 0 && right->aop->regs[YH_IDX] < 0); + genMove_o (shiftop, 0, ASMOP_ZERO, 0, skip_bytes, true, regDead (X_IDX, ic) && shiftop->regs[XL_IDX] < 0 && shiftop->regs[XH_IDX] < 0, regDead (Y_IDX, ic) && shiftop->regs[YL_IDX] < 0 && shiftop->regs[YH_IDX] < 0); + iterations %= 16; + } + else + genMove (shiftop, left->aop, right->aop->regs[A_IDX] < 0, regDead (X_IDX, ic) && right->aop->regs[XL_IDX] < 0 && right->aop->regs[XH_IDX] < 0, regDead (Y_IDX, ic) && right->aop->regs[YL_IDX] < 0 && right->aop->regs[YH_IDX] < 0); + + size = result->aop->size; + + for (i = 0; i < size; i++) + { + if (aopRS (shiftop) && (!aopInReg (shiftop, i, A_IDX) || aopInReg (right->aop, 0, A_IDX)) && shiftop->aopu.bytes[i].in_reg && + right->aop->regs[shiftop->aopu.bytes[i].byteu.reg->rIdx] == 0) + { + if (!regalloc_dry_run) + wassertl (0, "Overwriting shift count"); + cost (380, 380); + } + if (aopInReg (shiftop, i, A_IDX) && !pushed_a) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + } + } + + tlbl1 = (regalloc_dry_run ? 0 : newiTempLabel (0)); + tlbl2 = (regalloc_dry_run ? 0 : newiTempLabel (0)); + + // Get shift count into a. + if (right->aop->type == AOP_LIT) + { + if (!iterations) + goto postshift; + emit2 ("ld", "a, #0x%02x", iterations); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, right->aop, 0, false); + + if (right->aop->type != AOP_LIT || aopIsLitVal (right->aop, 0, 1, 0)) + { + if (!aopOnStack (right->aop, 0, 1) && right->aop->type != AOP_DIR) + emit3 (A_TNZ, ASMOP_A, 0); + if (tlbl2) + emit2 ("jreq", "!tlabel", labelKey2num (tlbl2->key)); + cost (2, 0); + } + + emitLabel (tlbl1); + + regalloc_dry_run_cycle_scale = iterations; + for (i = skip_bytes; i < size;) + { + int swapidx = -1; + + if (aopInReg (shiftop, i, X_IDX) || aopInReg (shiftop, i, Y_IDX)) + { + emit3w_o (i - skip_bytes ? A_RLCW : A_SLLW, shiftop, i, 0, 0); + i += 2; + continue; + } + + if (aopInReg (shiftop, i, A_IDX)) + { + emit2 (i - skip_bytes ? "rlc" : "sll", "(1, sp)"); + cost (2, 1); + i++; + continue; + } + + if (aopRS (shiftop) && !aopInReg (shiftop, i, A_IDX) && shiftop->aopu.bytes[i].in_reg) + swapidx = shiftop->aopu.bytes[i].byteu.reg->rIdx; + + if (swapidx == -1) + emit3_o (i - skip_bytes ? A_RLC : A_SLL, shiftop, i, 0, 0); + else + { + swap_to_a (swapidx); + emit3 (i - skip_bytes ? A_RLC : A_SLL, ASMOP_A, 0); + swap_from_a (swapidx); + } + i++; + } + emit3 (A_DEC, ASMOP_A, 0); + regalloc_dry_run_cycle_scale = 1; + + if (tlbl1) + emit2 ("jrne", "!tlabel", labelKey2num (tlbl1->key)); + cost (2, (iterations - 1) * 2 + 1); + emitLabel (tlbl2); + +postshift: + if(!regDead (A_IDX, ic)) + { + genMove (result->aop, shiftop, regDead (A_IDX, ic) || pushed_a, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + if (pushed_a) + pop (ASMOP_A, 0, 1); + } + else + { + if (pushed_a) + pop (ASMOP_A, 0, 1); + genMove (result->aop, shiftop, false, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + } + + freeAsmop (left); + freeAsmop (result); + freeAsmop (right); +} + +/*------------------------------------------------------------------*/ +/* genGetABit - get a bit */ +/*------------------------------------------------------------------*/ +static void +genGetABit (const iCode *ic, iCode *ifx) +{ + operand *left, *right, *result; + int shCount, leftcost, rightcost; + + D (emit2 ("; genGetABit", "")); + + right = IC_RIGHT (ic); + left = IC_LEFT (ic); + result = IC_RESULT (ic); + + aopOp (right, ic); + aopOp (left, ic); + aopOp (result, ic); + + shCount = (int) ulFromVal ((right->aop)->aopu.aop_lit); + + if (ifx && result->aop->type == AOP_CND) + { + wassert (shCount % 8 == 7); + + symbol *tlbl = regalloc_dry_run ? 0 : newiTempLabel (NULL); + + if (aopInReg (left->aop, shCount / 8, XH_IDX)) + emit3w (A_TNZW, ASMOP_X, 0); + else if (aopInReg (left->aop, shCount / 8, YH_IDX)) + emit3w (A_TNZW, ASMOP_Y, 0); + else if (aopInReg (left->aop, shCount / 8, XL_IDX) || aopInReg (left->aop, shCount / 8, YL_IDX)) + { + wassert (regalloc_dry_run); + cost (200, 200); + } + else + emit3_o (A_TNZ, left->aop, shCount / 8, 0, 0); + + if (!regalloc_dry_run) + emit2 (IC_TRUE (ifx) ? "jrpl" : "jrmi", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // Hmm. Cycle cost overestimate. + + emitJP (IC_TRUE (ifx) ? IC_TRUE (ifx) : IC_FALSE (ifx), 1.0f); // Hmm. Cycle cost overestimate. + emitLabel (tlbl); + + goto release; + } + + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + if ((shCount % 8) == 7 && + (aopInReg (left->aop, shCount / 8, XH_IDX) && regDead (X_IDX, ic) || aopInReg (left->aop, shCount / 8, YH_IDX) && regDead (Y_IDX, ic))) + { + bool x = aopInReg (left->aop, shCount / 8, XH_IDX); + emit3w (A_SLLW, x ? ASMOP_X : ASMOP_Y, 0); + goto write_to_a; + } + + genMove_o (ASMOP_A, 0, left->aop, shCount / 8, 1, TRUE, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + shCount %= 8; + + rightcost = 2 + (shCount > 4) + shCount % 4; + leftcost = 3 + (shCount <= 4) + (7 - shCount) % 4; + + if (rightcost < leftcost) + { + if (shCount > 4) + { + emit3 (A_SWAP, ASMOP_A, 0); + shCount -= 4; + } + while (shCount--) + emit3 (A_SRL, ASMOP_A, 0); + emit3 (A_AND, ASMOP_A, ASMOP_ONE); + cost (2, 1); + } + else + { + if (shCount <= 4) + { + emit3 (A_SWAP, ASMOP_A, 0); + shCount += 4; + } + while (shCount++ < 8) + emit3 (A_SLL, ASMOP_A, 0); +write_to_a: + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_RLC, ASMOP_A, 0); + } + + genMove (result->aop, ASMOP_A, TRUE, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + +release: + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*------------------------------------------------------------------*/ +/* genRightShiftLiteral - right shifting by known count */ +/*------------------------------------------------------------------*/ +static void +genRightShiftLiteral (operand *left, operand *right, operand *result, const iCode *ic) +{ + int shCount = (int) ulFromVal (right->aop->aopu.aop_lit); + int size, i; + bool sign; + bool xh_zero, yh_zero, xl_free, yl_free; + + struct asmop shiftop_impl; + struct asmop *shiftop; + + D (emit2 ("; genRightShiftLiteral", "")); + + size = getSize (operandType (result)); + + sign = !SPEC_USIGN (getSpec (operandType (left))); + + /* I suppose that the left size >= result size */ + wassert ((int) getSize (operandType (left)) >= size); + + aopOp (left, ic); + aopOp (result, ic); + + if (shCount > (size * 8)) + shCount = size * 8; + + if (!sign && shCount >= (size * 8)) + { + genMove(result->aop, ASMOP_ZERO, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + shiftop = result->aop; + goto release; + } + + wassertl (size <= 2 || shCount % 8 <= 1 + (size <= 4) || size == 4 && (shCount <= 10 || shCount >= 16), + "Shifting of longs and long longs by non-trivial values should be handled by generic function."); + + if ((shCount < 8 || sign) && aopRS (left->aop) && aopRS (result->aop)) + { + shiftop = &shiftop_impl; + init_shiftop (shiftop, result->aop, left->aop, right->aop, ic, FALSE); + + genMove (shiftop, left->aop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + } + else if (sign || + shCount >= 12 && aopInReg (result->aop, 0, X_IDX) && aopInReg (left->aop, 0, X_IDX) && regDead (Y_IDX, ic)) // Use divw, see below. + { + genMove (result->aop, left->aop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + shiftop = result->aop; + } + else // Top bytes will be zero. + { + genMove_o (result->aop, 0, left->aop, shCount / 8, size, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + size -= shCount / 8; + shCount %= 8; + shiftop = result->aop; + } + + if (!shCount) + goto release; + + xh_zero = shiftop->regs[XH_IDX] >= size; + yh_zero = shiftop->regs[YH_IDX] >= size; + xl_free = regDead (XL_IDX, ic) && shiftop->regs[XL_IDX] < 0; + yl_free = regDead (YL_IDX, ic) && shiftop->regs[YL_IDX] < 0; + + // Use swap a where beneficial. + if (!sign && size == 1 && aopRS (shiftop) && !aopOnStack (shiftop, 0, 1) && + !(aopInReg (shiftop, 0, XL_IDX) && xh_zero) && + !(aopInReg (shiftop, 0, YL_IDX) && yh_zero && shCount <= 3)) + { + swap_to_a (shiftop->aopu.bytes[0].byteu.reg->rIdx); + if (shCount >= 4) + { + emit3 (A_SWAP, ASMOP_A, 0); + emit2 ("and", "a, #0x0f"); + cost (2, 1); + shCount -= 4; + } + while (shCount--) + emit3 (A_SRL, ASMOP_A, 0); + swap_from_a (shiftop->aopu.bytes[0].byteu.reg->rIdx); + goto release; + } + + // div can be cheaper than a sequence of shifts. + if (!sign && shCount < 8 && + (shCount > 3 + !regDead (A_IDX, ic) * 2 && (size == 2 && aopInReg (shiftop, 0, X_IDX) || size == 1 && aopInReg (shiftop, 0, XL_IDX) && xh_zero) || + shCount * 2 > 4 + !regDead (A_IDX, ic) * 2 && (size == 2 && aopInReg (shiftop, 0, Y_IDX) || size == 1 && aopInReg (shiftop, 0, YL_IDX) && yh_zero))) + { + const bool in_y = aopInReg (shiftop, 0, Y_IDX); + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + emit2 ("ld", "a, #0x%02x", 1 << shCount); + cost (2, 1); + emit2 ("div", in_y ? "y, a" : "x, a"); + cost (1 + in_y, 17); // TODO: Find out exact value, replace 17 by exact value, and accordingly choose this optimization depending on optimization goal. + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + goto release; + } + + // divw can be cheaper than a sequence of shifts. + if (!sign && size == 2 && shCount > 5 && regDead (Y_IDX, ic) && aopInReg (shiftop, 0, X_IDX)) + { + emit2 ("ldw", "y, #0x%04x", 1 << shCount); + cost (4, 2); + emit2 ("divw", "x, y"); + cost (1, 17); // TODO: Find out exact value, replace 17 by exact value, and accordingly choose this optimization depending on optimization goal. + goto release; + } + + // Testing and rlwa is cheaper than 8 times sraw + if (sign && shCount >= (7 - regDead (A_IDX, ic)) && size >= 2 && (aopInReg (shiftop, size - 2, X_IDX) || aopInReg (shiftop, size - 2, Y_IDX)) && + (size == 2 || size == 3 && shCount >= 8 && aopInReg (shiftop, 0, A_IDX) || size == 4 && (aopInReg (shiftop, 0, X_IDX) || aopInReg (shiftop, 0, Y_IDX)))) + { + bool pushed_sign = false; + + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + symbol *tlbl = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + emit3 (A_CLR, ASMOP_A, 0); + emit3w_o (A_TNZW, shiftop, size - 2, 0, 0); + if (tlbl) + emit2 ("jrpl", "!tlabel", labelKey2num (tlbl->key)); + emit3 (A_DEC, ASMOP_A, 0); + cost (2, 0); + emitLabel (tlbl); + + if (shCount >= 8 + 6) + { + push (ASMOP_A, 0, 1); + pushed_sign = true; + } + while (shCount >= 6) + { + emit3w_o (A_RRWA, shiftop, size - 2, 0, 0); + if (size >= 4) + emit3w_o (A_RRWA, shiftop, 0, 0, 0); + shCount -= 8; + if (shCount >= 6) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + } + for (; shCount < 0; shCount++) + { + wassert (aopInReg (shiftop, 0, X_IDX) || aopInReg (shiftop, 0, Y_IDX)); + emit3 (A_SLL, ASMOP_A, 0); + emit3w_o (A_RLCW, shiftop, 0, 0, 0); + if (size >= 4) + { + wassert (aopInReg (shiftop, 2, X_IDX) || aopInReg (shiftop, 2, Y_IDX)); + emit3w_o (A_RLCW, shiftop, 2, 0, 0); + } + } + if (pushed_sign) + pop (ASMOP_A, 0, 1); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + + // Shifting right by 8, then shifting left a bit can be cheaper than shifting right all the way. + if (size == 4 && !sign && (shCount == 7 || shCount == 6) && + (aopInReg (shiftop, 0, X_IDX) || aopInReg (shiftop, 0, Y_IDX)) && + (aopInReg (shiftop, 2, X_IDX) || aopInReg (shiftop, 2, Y_IDX))) + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + emit3 (A_CLR, ASMOP_A, 0); + emit3w_o (A_RRWA, shiftop, 2, 0, 0); + emit3w_o (A_RRWA, shiftop, 0, 0, 0); + shCount -= 8; + for (; shCount < 0; shCount++) + { + emit3 (A_SLL, ASMOP_A, 0); + emit3w_o (A_RLCW, shiftop, 0, 0, 0); + emit3w_o (A_RLCW, shiftop, 2, 0, 0); + } + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + + while (shCount--) + for (i = size - 1; i >= 0;) + { + if (i > 0 && (aopInReg (shiftop, i - 1, X_IDX) || aopInReg (shiftop, i - 1, Y_IDX))) + { + emit3w_o ((i != size - 1) ? A_RRCW : (sign ? A_SRAW : A_SRLW), shiftop, i - 1, 0, 0); + i -= 2; + } + else if (!sign && i == size - 1 && (aopInReg (shiftop, i, XL_IDX) && xh_zero || aopInReg (shiftop, i, YL_IDX) && yh_zero)) // Skipped top byte, but 16-bit shift is cheaper than going through a and doing an 8-bit shift there. + { + emit3w (A_SRLW, aopInReg (shiftop, i, XL_IDX) ? ASMOP_X : ASMOP_Y, 0); + i--; + } + else if (i == 0 && (aopInReg (shiftop, i, XH_IDX) && xl_free || aopInReg (shiftop, i, YH_IDX) && yl_free)) // 16-bit shift is cheaper than going through a and doing an 8-bit shift there. + { + const bool in_x = aopInReg (shiftop, i, XH_IDX); + emit3w ((i != size - 1) ? A_RRCW : (sign ? A_SRAW : A_SRLW), in_x ? ASMOP_X : ASMOP_Y, 0); + i--; + } + else + { + int swapidx = -1; + if (aopRS (shiftop) && !aopInReg (shiftop, i, A_IDX) && shiftop->aopu.bytes[i].in_reg) + swapidx = shiftop->aopu.bytes[i].byteu.reg->rIdx; + + if (swapidx == -1) + emit3_o ((i != size - 1) ? A_RRC : (sign ? A_SRA : A_SRL), shiftop, i, 0, 0); + else + { + swap_to_a (swapidx); + emit3 ((i != size - 1) ? A_RRC : (sign ? A_SRA : A_SRL), ASMOP_A, 0); + swap_from_a (swapidx); + } + + i--; + } + } + +release: + genMove (result->aop, shiftop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genRightShift - generates code for right shifting */ +/*-----------------------------------------------------------------*/ +static void +genRightShift (const iCode *ic) +{ + operand *left, *right, *result; + int i, size; + bool pushed_a = false; + symbol *tlbl1, *tlbl2; + bool sign; + unsigned int iterations; + int skip_bytes = 0; + + struct asmop shiftop_impl; + struct asmop *shiftop; + + right = IC_RIGHT (ic); + left = IC_LEFT (ic); + result = IC_RESULT (ic); + + aopOp (right, ic); + + sign = !SPEC_USIGN (getSpec (operandType (left))); + + /* if the shift count is known then do it + as efficiently as possible */ + if (right->aop->type == AOP_LIT && + ((getSize (operandType (result)) <= 2) || + (!sign && ulFromVal (right->aop->aopu.aop_lit) % 8 <= (getSize (operandType (result)) <= 4 ? 2ul : 1ul)) || + (getSize (operandType (result)) <= 4 && ulFromVal (right->aop->aopu.aop_lit) <= 10) || + (getSize (operandType (result)) <= 4 && ulFromVal (right->aop->aopu.aop_lit) >= 16) || + (!sign && ulFromVal (right->aop->aopu.aop_lit) >= getSize (operandType (result)) * 8) ) ) + { + genRightShiftLiteral (left, right, result, ic); + freeAsmop (right); + return; + } + + D (emit2 ("; genRightShift", "")); + + aopOp (result, ic); + aopOp (left, ic); + + if (!regDead (A_IDX, ic)) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + } + + if ((aopRS (left->aop) || left->aop->type == AOP_LIT) && aopRS (result->aop)) + { + shiftop = &shiftop_impl; + init_shiftop (shiftop, result->aop, left->aop, right->aop, ic, false); + } + else + shiftop = result->aop; + + iterations = (right->aop->type == AOP_LIT ? byteOfVal (right->aop->aopu.aop_lit, 0) : 2); + + if (right->aop->type == AOP_LIT && !sign) + { + skip_bytes = iterations / 16 * 2; + genMove_o (shiftop, 0, left->aop, skip_bytes, shiftop->size, right->aop->regs[A_IDX] < 0, regDead (X_IDX, ic) && right->aop->regs[XL_IDX] < 0 && right->aop->regs[XH_IDX] < 0, regDead (Y_IDX, ic) && right->aop->regs[YL_IDX] < 0 && right->aop->regs[YH_IDX] < 0); + iterations %= 16; + } + else // TODO: What if shiftop and right operand overlap on stack? + genMove (shiftop, left->aop, right->aop->regs[A_IDX] < 0, regDead (X_IDX, ic) && right->aop->regs[XL_IDX] < 0 && right->aop->regs[XH_IDX] < 0, regDead (Y_IDX, ic) && right->aop->regs[YL_IDX] < 0 && right->aop->regs[YH_IDX] < 0); + + size = shiftop->size; + + for (i = 0; i < size; i++) + { + if (aopRS (shiftop) && (!aopInReg (shiftop, i, A_IDX) || aopInReg (right->aop, 0, A_IDX)) && shiftop->aopu.bytes[i].in_reg && + right->aop->regs[shiftop->aopu.bytes[i].byteu.reg->rIdx] == 0) + { + if (!regalloc_dry_run) + wassertl (0, "Overwriting shift count"); + cost (380, 380); + } + if (aopInReg (shiftop, i, A_IDX) && !pushed_a) + { + push (ASMOP_A, 0, 1); + pushed_a = true; + } + } + + tlbl1 = (regalloc_dry_run ? 0 : newiTempLabel (0)); + tlbl2 = (regalloc_dry_run ? 0 : newiTempLabel (0)); + + // Get shift count into a. + if (right->aop->type == AOP_LIT) + { + if (!iterations) + goto postshift; + emit2 ("ld", "a, #0x%02x", iterations); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, right->aop, 0, false); + + if (right->aop->type != AOP_LIT || aopIsLitVal (right->aop, 0, 1, 0)) + { + if (!aopOnStack (right->aop, 0, 1) && right->aop->type != AOP_DIR) + emit3 (A_TNZ, ASMOP_A, 0); + if (tlbl2) + emit2 ("jreq", "!tlabel", labelKey2num (tlbl2->key)); + cost (2, 0); + } + + emitLabel (tlbl1); + + regalloc_dry_run_cycle_scale = iterations; + for (i = size - 1 - skip_bytes; i >= 0;) + { + int swapidx = -1; + + if (i > 0 && (aopInReg (shiftop, i - 1, X_IDX) || aopInReg (shiftop, i - 1, Y_IDX))) + { + emit3w_o ((i != size - 1 - skip_bytes) ? A_RRCW : (sign ? A_SRAW : A_SRLW), shiftop, i - 1, 0, 0); + i -= 2; + continue; + } + else if (aopInReg (shiftop, i, A_IDX)) + { + emit2 ((i != size - 1 - skip_bytes) ? "rrc" : (sign ? "sra" : "srl"), "(1, sp)"); + cost (2, 1); + i--; + continue; + } + + if (aopRS (shiftop) && !aopInReg (shiftop, i, A_IDX) && shiftop->aopu.bytes[i].in_reg) + swapidx = shiftop->aopu.bytes[i].byteu.reg->rIdx; + + if (swapidx == -1) + emit3_o ((i != size - 1 - skip_bytes) ? A_RRC : (sign ? A_SRA : A_SRL), shiftop, i, 0, 0); + else + { + swap_to_a (swapidx); + emit3 ((i != size - 1 - skip_bytes) ? A_RRC : (sign ? A_SRA : A_SRL), ASMOP_A, 0); + swap_from_a (swapidx); + } + i--; + } + emit3 (A_DEC, ASMOP_A, 0); + regalloc_dry_run_cycle_scale = 1; + + if (tlbl1) + emit2 ("jrne", "!tlabel", labelKey2num (tlbl1->key)); + cost (2, (iterations - 1) * 2 + 1); + emitLabel (tlbl2); + +postshift: + if(!regDead (A_IDX, ic)) + { + genMove (result->aop, shiftop, regDead (A_IDX, ic) || pushed_a, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + if (pushed_a) + pop (ASMOP_A, 0, 1); + } + else + { + if (pushed_a) + pop (ASMOP_A, 0, 1); + genMove (result->aop, shiftop, false, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + } + + freeAsmop (left); + freeAsmop (result); + freeAsmop (right); +} + +/*------------------------------------------------------------------*/ +/* init_stackop - initalize asmop for stack location */ +/*------------------------------------------------------------------*/ +static void init_stackop (asmop *stackop, int size, long int stk_off) +{ + stackop->size = size; + stackop->regs[A_IDX] = -1; + stackop->regs[XL_IDX] = -1; + stackop->regs[XH_IDX] = -1; + stackop->regs[YL_IDX] = -1; + stackop->regs[YH_IDX] = -1; + + for (int i = 0; i < size; i++) + { + stackop->aopu.bytes[i].in_reg = false; + stackop->aopu.bytes[i].byteu.stk = stk_off + stackop->size - i - 1; + } + + stackop->type = AOP_STK; +} + +/*-----------------------------------------------------------------*/ +/* genPointerGet - generate code for pointer get */ +/*-----------------------------------------------------------------*/ +static void +genPointerGet (const iCode *ic) +{ + operand *result = IC_RESULT (ic); + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + int size, i; + unsigned offset; + bool use_y; + bool pushed_x = false; + bool pushed_a = false; + int blen, bstr; + bool bit_field = IS_BITVAR (getSpec (operandType (result))); + symbol *const tlbl = ((regalloc_dry_run || !bit_field) ? 0 : newiTempLabel (NULL)); + + blen = bit_field ? SPEC_BLEN (getSpec (operandType (result))) : 0; + bstr = bit_field ? SPEC_BSTR (getSpec (operandType (result))) : 0; + + D (emit2 ("; genPointerGet", "")); + + aopOp (IC_LEFT (ic), ic); + aopOp (IC_RIGHT (ic), ic); + aopOp (IC_RESULT (ic), ic); + + if (result->aop->type == AOP_DUMMY) + D (emit2 ("; Dummy read", "")); + + wassertl (right, "GET_VALUE_AT_ADDRESS without right operand"); + wassertl (IS_OP_LITERAL (IC_RIGHT (ic)), "GET_VALUE_AT_ADDRESS with non-literal right operand"); + + size = result->aop->size; + + // todo: What if right operand is negative? + offset = byteOfVal (right->aop->aopu.aop_lit, 1) * 256 + byteOfVal (right->aop->aopu.aop_lit, 0); + + // Long pointer indirect long addressing mode is useful only in one very specific case: + if (!bit_field && size == 1 && !offset && left->aop->type == AOP_DIR && !regDead (X_IDX, ic) && regDead (A_IDX, ic)) + { + emit2 ("ld", "a, [%s]", aopGet2(left->aop, 0)); + cost (4, 4); + cheapMove (result->aop, 0, ASMOP_A, 0, false); + goto release; + } + // Special case for remat pointer to on-stack object. + else if (!bit_field && left->aop->type == AOP_STL) + { + struct asmop stackop_impl; + init_stackop (&stackop_impl, result->aop->size, left->aop->aopu.stk_off + (long)offset); + genMove(result->aop, &stackop_impl, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + goto release; + } + // Special case for efficient handling of 8-bit I/O and rematerialized pointers + else if (!bit_field && size == 1 && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) + && regDead (A_IDX, ic)) + { + if (left->aop->type == AOP_LIT) + emit2("ld", offset ? "a, 0x%02x%02x+%d" : "a, 0x%02x%02x", byteOfVal (left->aop->aopu.aop_lit, 1), byteOfVal (left->aop->aopu.aop_lit, 0), offset); + else + emit2("ld", offset ? "a, %s+%d" : "a, %s+%d", left->aop->aopu.immd, left->aop->aopu.immd_off + offset); + cost (3, 1); + cheapMove (result->aop, 0, ASMOP_A, 0, FALSE); + goto release; + } + // Special case for efficient handling of 16-bit I/O and rematerialized pointers + else if (!bit_field && size == 2 && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) && + (aopInReg (result->aop, 0, X_IDX) || aopInReg (result->aop, 0, Y_IDX) || aopOnStack (result->aop, 0, 2) && regDead (X_IDX, ic))) + { + bool use_y = aopInReg (result->aop, 0, Y_IDX); + if (left->aop->type == AOP_LIT) + emit2("ldw", offset ? "%s, 0x%02x%02x+%d" : "%s, 0x%02x%02x", use_y ? "y" : "x", byteOfVal (left->aop->aopu.aop_lit, 1), byteOfVal (left->aop->aopu.aop_lit, 0), offset); + else + emit2("ldw", offset ? "%s, %s+%d" : "%s, %s+%d", use_y ? "y" : "x", left->aop->aopu.immd, left->aop->aopu.immd_off + offset); + cost (3 + use_y, 2); + genMove (result->aop, use_y ? ASMOP_Y : ASMOP_X, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + goto release; + } + + // todo: Handle this more gracefully, save x instead of using y. + use_y = (aopInReg (left->aop, 0, Y_IDX) && size <= 1 + aopInReg (result->aop, 0, Y_IDX)) || + !(regDead (X_IDX, ic) || aopInReg (left->aop, 0, X_IDX)) || + !bit_field && size == 2 && aopInReg (result->aop, 0, Y_IDX) && aopInReg (left->aop, 0, X_IDX) && !regDead (X_IDX, ic); + + if (use_y ? !(regDead (Y_IDX, ic) || aopInReg (left->aop, 0, Y_IDX)) : !(regDead (X_IDX, ic) || aopInReg (left->aop, 0, X_IDX))) // Preferred index register is not free. + { + // Try to free an index register. + if (result->aop->regs[XL_IDX] < 0 && result->aop->regs[XH_IDX] < 0) + { + push (ASMOP_X, 0, 2); + pushed_x = true; + use_y = false; + } + else + { + if (!regalloc_dry_run) + wassertl (0, use_y ? "No free reg y for pointer." : "No free reg x for pointer."); + cost (180, 180); + goto release; + } + } + + if (left->aop->type == AOP_STL) + { + emit2 ("ldw", "x, sp"); + emit2 ("addw", "x, #%ld", (long)(left->aop->aopu.stk_off) + G.stack.pushed ); + cost (4, 3); + } + else + genMove (use_y ? ASMOP_Y : ASMOP_X, left->aop, FALSE, regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + if (floatFromVal (right->aop->aopu.aop_lit) < 0.0) + { + emit2 ("addw", use_y ? "y, #0x%x" : "x, #0x%x", offset); + offset = 0; + cost (use_y ? 4 : 3, 2); + } + + // Get all the bytes. todo: Get the byte in a last (if not a bit-field), so we do not need to save a. + for (i = 0; !bit_field ? i < size : blen > 0; i++, blen -= 8) + { + int o = (bit_field ? i : size - 1 - i) + offset; + + if (!bit_field && i + 2 == size && !aopInReg (result->aop, i, A_IDX) && !aopInReg (result->aop, i + 1, A_IDX) && + (result->aop->regs[use_y ? YL_IDX : XL_IDX] < 0 || result->aop->regs[use_y ? YL_IDX : XL_IDX] >= i) && (result->aop->regs[use_y ? YH_IDX : XH_IDX] < 0 || result->aop->regs[use_y ? YH_IDX : XH_IDX] >= i) && regDead (use_y ? Y_IDX : X_IDX, ic)) + { + o--; + if (!o) + emit2 ("ldw", use_y ? "y, (y)" : "x, (x)"); + else + emit2 ("ldw", use_y ? "y, (0x%x, y)" : "x, (0x%x, x)", o); + cost (1 + use_y + (o > 0) + (o > 256), 2); + + genMove_o (result->aop, i, use_y ? ASMOP_Y : ASMOP_X, 0, 2, regDead (A_IDX, ic) && (result->aop->regs[A_IDX] < 0 || result->aop->regs[A_IDX] >= i) || pushed_a, FALSE, FALSE); // todo: Allow more. + + i++, blen -= 8; + continue; + } + else if (!bit_field && !use_y && + (aopInReg (result->aop, i, Y_IDX) || aopOnStackNotExt (result->aop, i, 2) && regDead (Y_IDX, ic) && result->aop->regs[YL_IDX] < 0 && result->aop->regs[YH_IDX] < 0 && i + 3 < size && !optimize.codeSpeed)) + { + o--; + + emit2 ("ldw", "y, x"); + if (!o) + emit2 ("ldw", "y, (y)"); + else + emit2 ("ldw", "y, (0x%x, y)", o); + cost (4 + (o > 0) + (o > 256), 3); + genMove_o (result->aop, i, ASMOP_Y, 0, 2, pushed_a, false, true); + + i++, blen -= 8; + continue; + } + + if (!pushed_a && (!regDead (A_IDX, ic) || result->aop->regs[A_IDX] >= 0 && result->aop->regs[A_IDX] < i)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + if (!o) + { + emit2 ("ld", use_y ? "a, (y)" : "a, (x)"); + cost (1 + use_y, 1); + } + else + { + emit2 ("ld", use_y ? "a, (0x%x, y)" : "a, (0x%x, x)", o); + cost ((o < 256 ? 2 : 3) + use_y, 1); + } + + if (bit_field && blen < 8 && !i) // The only byte might need shifting. + { + if (bstr >= 4) + { + emit3 (A_SWAP, ASMOP_A, 0); + bstr -= 4; + } + while (bstr--) + emit3 (A_SRL, ASMOP_A, 0); + } + if (bit_field && blen < 8) // The partial byte. + { + emit2 ("and", "a, #0x%02x", 0xff >> (8 - blen)); + cost (2, 1); + } + + if (bit_field && blen <= 8 && !SPEC_USIGN (getSpec (operandType (result)))) // Sign extension for partial byte of signed bit-field + { + emit2 ("bcp", "a, #0x%02x", 0x80 >> (8 - blen)); + cost (2, 1); + if (tlbl) + emit2 ("jreq", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 0); + emit2 ("or", "a, #0x%02x", (0xff00 >> (8 - blen)) & 0xff); + cost (2, 1); + emitLabel (tlbl); + } + + if (result->aop->type == AOP_DUMMY) + continue; + + cheapMove (result->aop, i, ASMOP_A, 0, FALSE); + + if (i < size - 1 && (use_y ? aopInReg (result->aop, i, YL_IDX) || aopInReg (result->aop, i, YH_IDX) : aopInReg (result->aop, i, XL_IDX) || aopInReg (result->aop, i, XH_IDX))) + { + if (!regalloc_dry_run) + wassertl (0, "Overwriting pointer"); + cost (180, 180); + } + } + + if (pushed_a) + pop (ASMOP_A, 0, 1); + + if (bit_field && i < size) + { + if (SPEC_USIGN (getSpec (operandType (result)))) + genMove_o (result->aop, i, ASMOP_ZERO, 0, bit_field ? i : size - i - 1, FALSE, FALSE, FALSE); + else + wassertl (0, "Unimplemented multibyte sign extension for bit-field."); + } + +release: + if (pushed_x) + pop (ASMOP_X, 0, 2); + + freeAsmop (right); + freeAsmop (left); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genAssign - generate code for assignment */ +/*-----------------------------------------------------------------*/ +static void +genAssign (const iCode *ic) +{ + operand *result, *right; + + D (emit2 ("; genAssign", "")); + + result = IC_RESULT (ic); + right = IC_RIGHT (ic); + + aopOp (right, ic); + aopOp (result, ic); + + wassert (result->aop->type != AOP_DUMMY || right->aop->type != AOP_DUMMY); + + if (right->aop->type == AOP_DUMMY) + { + int i; + D (emit2 ("; Dummy write", "")); + for (i = 0; i < result->aop->size; i++) + cheapMove (result->aop, i, ASMOP_A, 0, TRUE); + } + else if (result->aop->type == AOP_DUMMY) + { + int i; + D (emit2 ("; Dummy read", "")); + + if (!regDead(A_IDX, ic) && right->aop->type == AOP_DIR) + for (i = 0; i < right->aop->size; i++) + emit3_o (A_TNZ, right->aop, i, 0, 0); + else + { + if (!regDead(A_IDX, ic)) + push (ASMOP_A, 0, 1); + for (i = 0; i < right->aop->size; i++) + cheapMove (ASMOP_A, 0, right->aop, i, FALSE); + if (!regDead(A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + } + else + genMove(result->aop, right->aop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + wassert (result->aop != right->aop); + freeAsmop (right); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genPointerSet - stores the value into a pointer location */ +/*-----------------------------------------------------------------*/ +static void +genPointerSet (iCode *ic) +{ + operand *left = IC_LEFT (ic); + operand *right = IC_RIGHT (ic); + int size, i, j; + bool use_y; + int pushed_a = 0; + int blen, bstr; + bool bit_field = IS_BITVAR (getSpec (operandType (right))) || IS_BITVAR (getSpec (operandType (left))); + int cache_l = -1, cache_h = -1/*, cache_a = -1*/; + + blen = bit_field ? (SPEC_BLEN (getSpec (operandType (IS_BITVAR (getSpec (operandType (right))) ? right : left)))) : 0; + bstr = bit_field ? (SPEC_BSTR (getSpec (operandType (IS_BITVAR (getSpec (operandType (right))) ? right : left)))) : 0; + + D (emit2 ("; genPointerSet", "")); + + aopOp (left, ic); + aopOp (right, ic); + + size = right->aop->size; + + // In some cases a sequence of mov instructions is more efficient. + if (!bit_field && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) && (right->aop->type == AOP_DIR || right->aop->type == AOP_LIT || right->aop->type == AOP_IMMD)) + { + // First, make an estimate to find out if it is worth it (estimate not exact, could be improved a bit, probably not worth it since left type is uncommon) + const int mov_size = size * (right->aop->type == AOP_DIR ? 3 : 4); + const int mov_cycles = size * 1; + int normal_size = 3; + int normal_cycles = 2; + bool needs_a = false; + + for (i = 0; i < size;) + { + if (aopIsLitVal (right->aop, i, 1, 0)) // clr (x) + { + normal_size += i ? 2 : 1; + normal_cycles += 1; + i++; + } + else if (i + 1 < size) // ld y, . followed by ldw (x), y + { + normal_size += (i ? 6 : 5) - (right->aop->type == AOP_DIR); + normal_cycles += 3; + i += 2; + } + else // ld a, . followed by ldw (x), a + { + needs_a = true; + normal_size += i ? 4 : 3; + normal_cycles += 2; + i++; + } + } + + if (!regDead (X_IDX, ic)) + { + normal_size += 2; + normal_cycles += 2; + } + if (needs_a && !regDead (A_IDX, ic)) + { + normal_size += 2; + normal_cycles += 2; + } + + if ((mov_size <= normal_size || optimize.codeSpeed) && (mov_cycles <= normal_cycles || optimize.codeSize)) + { + for (i = 0; i < size; i++) + { + if (left->aop->type == AOP_LIT) + emit2 ("mov", "0x%02x%02x+%d, %s", byteOfVal (left->aop->aopu.aop_lit, 1), byteOfVal (left->aop->aopu.aop_lit, 0), size - i - 1, aopGet (right->aop, i)); + else + emit2 ("mov", "%s+%d, %s", left->aop->aopu.immd, left->aop->aopu.immd_off + size - i - 1, aopGet (right->aop, i)); + cost (right->aop->type == AOP_DIR ? 3 : 4, 1); + } + goto release; + } + } + + // Use bset / bres. + if (bit_field && blen == 1 && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) && right->aop->type == AOP_LIT) + { + const char *inst = (byteOfVal (right->aop->aopu.aop_lit, 0) & 1) ? "bset" : "bres"; + if (left->aop->type == AOP_LIT) + emit2 (inst, "0x%02x%02x, #%u", byteOfVal (left->aop->aopu.aop_lit, 1), byteOfVal (left->aop->aopu.aop_lit, 0), bstr); + else + emit2 (inst, "%s+%d, #%u", left->aop->aopu.immd, left->aop->aopu.immd_off, bstr); + cost (4, 1); + goto release; + } + // Use bccm + if (bit_field && blen == 1 && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD)) + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, right->aop, 0, false); + emit3(A_SRL, ASMOP_A, 0); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + emit2 ("bccm", "%s+%d, #%u", left->aop->aopu.immd, left->aop->aopu.immd_off, bstr); + cost (4, 1); + goto release; + } + + if (!bit_field && size == 1 && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) && aopInReg(right->aop, 0, A_IDX)) + { + if (left->aop->type == AOP_LIT) + emit2 ("ld", "0x%02x%02x, %s", byteOfVal (left->aop->aopu.aop_lit, 1), byteOfVal (left->aop->aopu.aop_lit, 0), aopGet (right->aop, 0)); + else + emit2 ("ld", "%s+%d, %s", left->aop->aopu.immd, left->aop->aopu.immd_off, aopGet (right->aop, 0)); + cost (3, 1); + goto release; + } + if (!bit_field && size == 2 && (left->aop->type == AOP_LIT || left->aop->type == AOP_IMMD) && (aopInReg(right->aop, 0, X_IDX) || aopInReg(right->aop, 0, Y_IDX))) + { + if (left->aop->type == AOP_LIT) + emit2 ("ldw", "0x%02x%02x, %s", byteOfVal (left->aop->aopu.aop_lit, 1), byteOfVal (left->aop->aopu.aop_lit, 0), aopGet2 (right->aop, 0)); + else + emit2 ("ldw", "%s+%d, %s", left->aop->aopu.immd, left->aop->aopu.immd_off, aopGet2 (right->aop, 0)); + cost (3 + aopInReg(right->aop, 0, Y_IDX), 2); + goto release; + } + + // Long pointer indirect long addressing mode is useful only in two very specific cases: + if (!bit_field && size == 1 && left->aop->type == AOP_DIR && !regDead (X_IDX, ic) && (aopInReg(right->aop, 0, A_IDX) || regDead (A_IDX, ic))) + { + emit2("ld", "[%s], a", aopGet2 (left->aop, 0)); + cost (4, 4); + goto release; + } + else if (!bit_field && size == 2 && left->aop->type == AOP_DIR && (!regDead (Y_IDX, ic) || !optimize.codeSpeed) && aopInReg(right->aop, 0, X_IDX)) + { + emit2("ldw", "[%s], x", aopGet2 (left->aop, 0)); + cost (4, 5); + goto release; + } + + // Rematerialized pointer to on-stack object. + if (!bit_field && left->aop->type == AOP_STL) + { + struct asmop stackop_impl; + init_stackop (&stackop_impl, size, left->aop->aopu.stk_off); + genMove(&stackop_impl, right->aop, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + goto release; + } + + // todo: Handle this more gracefully, save x instead of using y, when doing so is more efficient. + use_y = (aopInReg (left->aop, 0, Y_IDX) && size <= 1 + aopInReg (right->aop, 0, X_IDX)) || regDead (Y_IDX, ic) && (!(regDead (X_IDX, ic) || aopInReg (left->aop, 0, X_IDX)) || right->aop->regs[XL_IDX] >= 0 || right->aop->regs[XH_IDX] >= 0); + + if (!(regDead (use_y ? Y_IDX : X_IDX, ic) || aopInReg (left->aop, 0, use_y ? Y_IDX : X_IDX)) || right->aop->regs[use_y ? YL_IDX : XL_IDX] >= 0 || right->aop->regs[use_y ? YH_IDX : XH_IDX] >= 0) + { + if (!regalloc_dry_run) + wassertl (0, "No free reg for pointer."); + + cost (180, 180); + goto release; + } + + if (left->aop->type == AOP_STL) + { + emit2 ("ldw", "x, sp"); + emit2 ("addw", "x, #%ld", (long)(left->aop->aopu.stk_off) + G.stack.pushed ); + cost (4, 3); + } + else + genMove (use_y ? ASMOP_Y : ASMOP_X, left->aop, regDead (A_IDX, ic) && !aopInReg (right->aop, 0, A_IDX), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + for (i = 0; !bit_field ? i < size : blen > 0; i++, blen -= 8) + { + if (!bit_field && aopIsLitVal (right->aop, i, 1, 0) && + !(!use_y && i + 1 < size && optimize.codeSize && regDead (Y_IDX, ic) && right->aop->type == AOP_LIT && (size - 2 - i || !cache_l && !cache_h))) // clrw y, ldw (d, x), y is cheaper than this. ldw (x), y is cheaper than this if y is zero. + { + if (!(size - 1 - i)) + emit2 ("clr", use_y ? "(y)" : "(x)"); + else + emit2 ("clr", use_y ? "(0x%x, y)" : "(0x%x, x)", size - 1 - i); + cost (1 + use_y + ((size - 1 - i) > 0) + ((size - 1 - i) > 256) + (!use_y && ((size - 1 - i) > 256)), 2); + + continue; + } + + if (!bit_field && i + 1 < size && !aopInReg (right->aop, i, A_IDX) && !aopInReg (right->aop, i + 1, A_IDX) && + (aopInReg(right->aop, i, use_y ? X_IDX : Y_IDX) || regDead (use_y ? X_IDX : Y_IDX, ic) && right->aop->regs[use_y ? XL_IDX : YL_IDX] <= i + 1 && right->aop->regs[use_y ? XH_IDX : YH_IDX] <= i + 1)) + { + if (right->aop->type == AOP_LIT) + { + if (cache_l != byteOfVal (right->aop->aopu.aop_lit, i) || cache_h != byteOfVal (right->aop->aopu.aop_lit, i + 1)) + { + genMove_o (use_y ? ASMOP_X : ASMOP_Y, 0, right->aop, i, 2, FALSE, FALSE, FALSE); + cache_l = byteOfVal (right->aop->aopu.aop_lit, i); + cache_h = byteOfVal (right->aop->aopu.aop_lit, i + 1); + } + } + else + genMove_o (use_y ? ASMOP_X : ASMOP_Y, 0, right->aop, i, 2, FALSE, FALSE, FALSE); + + if (!(size - 2 - i)) + emit2 ("ldw", use_y ? "(y), x" : "(x), y"); + else + emit2 ("ldw", use_y ? "(0x%x, y), x" : "(0x%x, x), y", size - 2 - i); + cost (1 + use_y + ((size - 2 - i) > 0) + ((size - 2 - i) > 256), 2); + + i++, blen -= 8; + continue; + } + + // todo: handle byte in a first, if dead, so we do not need to save it. + if ((!regDead (A_IDX, ic) && !(aopInReg (right->aop, i, A_IDX) && !bit_field) || right->aop->regs[A_IDX] > i) && !pushed_a) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + if (use_y ? aopInReg (right->aop, i, YL_IDX) || aopInReg (right->aop, i, YH_IDX) : aopInReg (right->aop, i, XL_IDX) || aopInReg (right->aop, i, XH_IDX)) + { + if (!regalloc_dry_run) + wassertl (0, "Overwriting pointer"); + cost (180, 180); + } + + if (bit_field && blen < 8 && right->aop->type == AOP_LIT) // We can save a lot of shifting and masking using the known literal value + { + unsigned char bval = (byteOfVal (right->aop->aopu.aop_lit, i) << bstr) & ((0xff >> (8 - blen)) << bstr); + + if (((~((0xff >> (8 - blen)) << bstr) & 0xff) | bval) == 0xff) + { + if (!i) + { + emit2 ("ld", use_y ? "a, (y)" : "a, (x)"); + cost (1 + use_y, 1); + } + else + { + emit2 ("ld", use_y ? "a, (0x%x, y)" : "a, (0x%x, x)", i); + cost ((size - 1 - i < 256 ? 2 : 3) + use_y, 1); + } + } + else + { + emit2 ("ld", "a, #0x%02x", ~((0xff >> (8 - blen)) << bstr) & 0xff); + cost (2, 1); + if (!i) + { + emit2 ("and", use_y ? "a, (y)" : "a, (x)"); + cost (1 + use_y, 1); + } + else + { + emit2 ("and", use_y ? "a, (0x%x, y)" : "a, (0x%x, x)", i); + cost ((size - 1 - i < 256 ? 2 : 3) + use_y, 1); + } + } + if (bval) + { + emit2 ("or", "a, #0x%02x", bval); + cost (2, 1); + } + goto store; + } + + if (pushed_a && aopInReg (right->aop, i, A_IDX)) + { + emit2 ("ld", "a, (1, sp)"); + cost (2, 1); + } + else + cheapMove (ASMOP_A, 0, right->aop, i, FALSE); + + if (bit_field && blen < 8) + { + if (bstr >= 4) + emit3 (A_SWAP, ASMOP_A, 0); + for (j = (bstr >= 4 ? 4 : 0); j < bstr; j++) + emit3 (A_SLL, ASMOP_A, 0); + emit2 ("and", "a, #0x%02x", (0xff >> (8 - blen)) << bstr); + cost (2, 1); + push (ASMOP_A, 0, 1); + pushed_a++; + emit2 ("ld", "a, #0x%02x", ~((0xff >> (8 - blen)) << bstr) & 0xff); + cost (2, 1); + if (!i) + { + emit2 ("and", use_y ? "a, (y)" : "a, (x)", i); + cost (1 + use_y, 1); + } + else + { + emit2 ("and", use_y ? "a, (0x%x, y)" : "a, (0x%x, x)", i); + cost ((size - 1 - i < 256 ? 2 : 3) + use_y, 1); + } + emit2 ("or", "a, (1, sp)"); + cost (2, 1); + } + +store: + + if (!(bit_field ? i : size - 1 - i)) + { + emit2 ("ld", use_y ? "(y), a" : "(x), a"); + cost (1 + use_y, 1); + } + else + { + emit2 ("ld", use_y ? "(0x%x, y), a" : "(0x%x, x), a", bit_field ? i : size - 1 - i); + cost ((size - 1 - i < 256 ? 2 : 3) + use_y, 1); + } + } + + while (pushed_a--) + pop (ASMOP_A, 0, 1); + +release: + freeAsmop (right); + freeAsmop (left); +} + +/*-----------------------------------------------------------------*/ +/* genIfx - generate code for Ifx statement */ +/*-----------------------------------------------------------------*/ +static void +genIfx (const iCode *ic) +{ + // todo: This function currently reports code size costs only, other costs will depend on profiler information. + bool inv = FALSE; + operand *const cond = IC_COND (ic); + sym_link *type = operandType (cond); + symbol *const tlbl = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + symbol *tlbl2 = NULL; + aopOp (cond, ic); + + D (emit2 ("; genIfx", "")); + + if (IS_BOOL (type) && cond->aop->type == AOP_DIR) + { + if (tlbl) + emit2 (IC_FALSE (ic) ? "btjt" : "btjf", "%s, #0, !tlabel", aopGet (cond->aop, 0), labelKey2num (tlbl->key)); + cost (5, 0); + } + else if (aopInReg (cond->aop, 0, C_IDX)) + { + wassertl (IS_BOOL (type), "Variable of type other than _Bool in carry bit."); + if (tlbl) + emit2 (IC_FALSE (ic) ? "jrc" : "jrnc", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 0); + } + else if (aopRS (cond->aop) || cond->aop->type == AOP_DIR) + { + int i; + + for (i = 0; i < cond->aop->size;) // todo: Use tnzw; test a first, if dead, to free a; use swapw followed by exg to test xh if xl is dead (same for yh), use tnzw independently of where in the operand xl and xh are. + { + bool floattopbyte = (i == cond->aop->size - 1) && IS_FLOAT(type); + bool floattopword = (i == cond->aop->size - 2) && IS_FLOAT(type); + + if (!floattopword && i + 1 < cond->aop->size && + (aopInReg (cond->aop, i, X_IDX) || aopInReg (cond->aop, i, Y_IDX) || + (cond->aop->type == AOP_REG && (cond->aop->aopu.bytes[i].byteu.reg->rIdx == XH_IDX && cond->aop->aopu.bytes[i + 1].byteu.reg->rIdx == XL_IDX || cond->aop->aopu.bytes[i].byteu.reg->rIdx == YH_IDX && cond->aop->aopu.bytes[i + 1].byteu.reg->rIdx == YL_IDX)))) + { + bool in_y = (aopInReg (cond->aop, i, Y_IDX) || aopInReg (cond->aop, i, YH_IDX) && aopInReg (cond->aop, i + 1, YL_IDX)); + emit3w (A_TNZW, in_y ? ASMOP_Y : ASMOP_X, 0); + i += 2; + } + else if (i + 1 < cond->aop->size && regDead (X_IDX, ic) && cond->aop->regs[XL_IDX] < i && cond->aop->regs[XH_IDX] < i && + (aopOnStack (cond->aop, i, 2) || cond->aop->type == AOP_DIR)) + { + genMove_o (ASMOP_X, 0, cond->aop, i, 2, regDead (A_IDX, ic) && cond->aop->regs[A_IDX] < i, TRUE, FALSE); + if (floattopword) + emit3w (A_SLLW, ASMOP_X, 0); + i += 2; + } + else if (i + 1 < cond->aop->size && regDead (Y_IDX, ic) && cond->aop->regs[YL_IDX] < i && cond->aop->regs[YH_IDX] < i && + (aopOnStack (cond->aop, i, 2) || cond->aop->type == AOP_DIR)) + { + genMove_o (ASMOP_Y, 0, cond->aop, i, 2, regDead (A_IDX, ic) && cond->aop->regs[A_IDX] < i, FALSE, TRUE); + if (floattopword) + emit3w (A_SLLW, ASMOP_Y, 0); + i += 2; + } + else if ((aopInReg (cond->aop, i, XL_IDX) || aopInReg (cond->aop, i, XH_IDX) || aopInReg (cond->aop, i, YL_IDX) || aopInReg (cond->aop, i, YH_IDX)) && regDead (A_IDX, ic) && cond->aop->regs[A_IDX] <= i) + { + cheapMove (ASMOP_A, 0, cond->aop, i, FALSE); + emit3 (floattopbyte ? A_SLL : A_TNZ, ASMOP_A, 0); + i++; + } + // We can't just use swap_to_a() to improve the following four cases because it might use rrwa and rlwa which destroy the Z flag. + else if (aopInReg (cond->aop, i, XL_IDX) && (!floattopbyte || regDead (XL_IDX, ic))) + { + emit2 ("exg", "a, xl"); + cost (1, 1); + emit3(floattopbyte ? A_SLL : A_TNZ, ASMOP_A, 0); + emit2 ("exg", "a, xl"); + cost (1, 1); + i++; + } + else if (aopInReg (cond->aop, i, YL_IDX) && (!floattopbyte || regDead (YL_IDX, ic))) + { + emit2 ("exg", "a, yl"); + cost (1, 1); + emit3(floattopbyte ? A_SLL : A_TNZ, ASMOP_A, 0); + emit2 ("exg", "a, yl"); + cost (1, 1); + i++; + } + else if (!floattopbyte && !aopInReg (cond->aop, i, XH_IDX) && !aopInReg (cond->aop, i, YH_IDX)) + { + emit3_o (A_TNZ, cond->aop, i, 0, 0); + i++; + } + else if (floattopbyte && aopInReg (cond->aop, i, A_IDX)) + { + emit2 ("and", "a, 0x7f"); + cost (2, 1); + i++; + } + else + { + push (ASMOP_A, 0, 1); + cheapMove (ASMOP_A, 0, cond->aop, i, FALSE); + emit3(floattopbyte ? A_SLL : A_TNZ, ASMOP_A, 0); + pop (ASMOP_A, 0, 1); + i++; + } + + if (!inv && i < cond->aop->size && !IC_FALSE (ic)) + { + tlbl2 = (regalloc_dry_run ? 0 : newiTempLabel (NULL)); + inv = TRUE; + } + + if (tlbl) + emit2 ((!!IC_FALSE (ic) ^ (inv && i != cond->aop->size)) ? "jrne" : "jreq", "!tlabel", labelKey2num ((inv && i == cond->aop->size) ? tlbl2->key : tlbl->key)); + cost (2, 0); + } + } + else if (cond->aop->type == AOP_IMMD) + { + // An AOP_IMMD points to something valid, so it is not a null pointer. Just fall through to the unconditional jump generated below. + } + else + { + if (!regalloc_dry_run) + { + printf ("cond aop type %d, size %d\n", cond->aop->type, cond->aop->size); + wassertl (0, "Unimplemented conditional jump."); + } + cost (180, 180); + } + + if (inv) + { + emitLabel (tlbl); + emitJP (IC_TRUE (ic) ? IC_TRUE (ic) : IC_FALSE (ic), 0.0f); + emitLabel (tlbl2); + } + else + { + emitJP (IC_TRUE (ic) ? IC_TRUE (ic) : IC_FALSE (ic), 0.0f); + emitLabel (tlbl); + } + + freeAsmop (cond); +} + +/*-----------------------------------------------------------------*/ +/* genAddrOf - generates code for address of */ +/*-----------------------------------------------------------------*/ +static void +genAddrOf (const iCode *ic) +{ + const symbol *sym; + operand *result, *left, *right; + + D (emit2 ("; genAddrOf", "")); + + result = IC_RESULT (ic); + left = IC_LEFT (ic); + right = IC_RIGHT (ic); + + wassert (result); + wassert (left); + wassert (IS_TRUE_SYMOP (left)); + wassert (right && IS_OP_LITERAL (IC_RIGHT (ic))); + + sym = OP_SYMBOL_CONST (left); + wassert (sym); + + aopOp (result, ic); + + // todo: When optimizing for size, putting on-stack address into y when y is free is cheaper calculating in x, then using exgw. + if (aopInReg (result->aop, 0, Y_IDX) || regDead (Y_IDX, ic) && !regDead (X_IDX, ic)) + { + if (!sym->onStack) + { + wassert (sym->name); + emit2 ("ldw", "y, #%s+%ld", sym->rname, (long)(operandLitValue (right))); + cost (4, 2); + } + else + { + emit2 ("ldw", "y, sp"); + cost (2, 1); + if ((long)(sym->stack) + G.stack.pushed + 1 + (long)(operandLitValue (right)) != 1l) + { + emit2 ("addw", "y, #%ld", (long)(sym->stack) + G.stack.pushed + 1 + (long)(operandLitValue (right))); + cost (4, 2); + } + else + emit3w (A_INCW, ASMOP_Y, 0); + } + genMove (result->aop, ASMOP_Y, regDead (A_IDX, ic), FALSE, regDead (X_IDX, ic)); + } + else if (!(regDead (XH_IDX, ic) ^ regDead (XL_IDX, ic))) + { + if (!regDead (X_IDX, ic)) + push (ASMOP_X, 0, 2); + if (!sym->onStack) + { + wassert (sym->name); + emit2 ("ldw", "x, #%s+%ld", sym->rname, (long)(operandLitValue (right))); + cost (3, 2); + } + else + { + wassert (regalloc_dry_run || sym->stack + G.stack.pushed + 1 + (long)(operandLitValue (right)) > 0); + emit2 ("ldw", "x, sp"); + cost (1, 1); + if ((long)(sym->stack) + G.stack.pushed + 1 + (long)(operandLitValue (right)) > 2l) + { + emit2 ("addw", "x, #%ld", (long)(sym->stack) + G.stack.pushed + 1 + (long)(operandLitValue (right))); + cost (3, 2); + } + else + { + emit3w (A_INCW, ASMOP_X, 0); + if ((long)(sym->stack) + G.stack.pushed + 1 + (long)(operandLitValue (right)) > 1l) + emit3w (A_INCW, ASMOP_X, 0); + } + } + genMove (result->aop, ASMOP_X, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + if (!regDead (X_IDX, ic)) + pop (ASMOP_X, 0, 2); + } + else // todo: Handle case of y alive and x partially alive; todo: Use mov when destination is a global variable. + { + if (!regalloc_dry_run) + wassertl (0, "Unimplemented genAddrOf deadness."); + cost (180, 180); + } + + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genJumpTab - generate code for jump table */ +/*-----------------------------------------------------------------*/ +static void +genJumpTab (const iCode *ic) +{ + symbol *jtab = regalloc_dry_run ? 0 : newiTempLabel (0); + operand *cond; + + D (emit2 ("; genJumpTab", "")); + + wassertl (options.model != MODEL_LARGE, "Jump tables not implemented for large memory model."); + + cond = IC_JTCOND (ic); + + aopOp (cond, ic); + + if (!regDead (X_IDX, ic)) + { + wassertl (regalloc_dry_run, "Need free X for jump table."); + cost (180, 180); + } + + genMove (ASMOP_X, cond->aop, regDead (A_IDX, ic), TRUE, regDead (Y_IDX, ic)); + + emit3w (A_SLLW, ASMOP_X, 0); + + if (!regalloc_dry_run) + { + emit2 ("ldw", "x, (#!tlabel, x)", labelKey2num (jtab->key)); + emit2 ("jp", "(x)"); + } + cost (4, 3); + + emitLabel (jtab); + for (jtab = setFirstItem (IC_JTLABELS (ic)); jtab; jtab = setNextItem (IC_JTLABELS (ic))) + { + if (!regalloc_dry_run) + emit2 (".dw", "#!tlabel", labelKey2num (jtab->key)); + cost (2, 0); + } + + freeAsmop (cond); +} + +/*-----------------------------------------------------------------*/ +/* genCast - generate code for cast */ +/*-----------------------------------------------------------------*/ +static void +genCast (const iCode *ic) +{ + operand *result, *right; + int size, offset; + sym_link *resulttype, *righttype; + + D (emit2 ("; genCast", "")); + + result = IC_RESULT (ic); + right = IC_RIGHT (ic); + resulttype = operandType (result); + righttype = operandType (right); + + if ((getSize (resulttype) <= getSize (righttype) || !IS_SPEC (righttype) || (SPEC_USIGN (righttype) || IS_BOOL (righttype))) && + (!IS_BOOL (resulttype) || IS_BOOL (righttype))) + { + genAssign (ic); + return; + } + + aopOp (right, ic); + aopOp (result, ic); + + if (IS_BOOL (resulttype) && right->aop->size == 1 && + (aopInReg (right->aop, 0, A_IDX) || (right->aop->type != AOP_REG && right->aop->type != AOP_REGSTK) || !right->aop->aopu.bytes[0].in_reg)) + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + if (aopInReg(right->aop, 0, A_IDX)) + { + emit3 (A_NEG, ASMOP_A, 0); + emit3 (A_CLR, ASMOP_A, 0); + } + else + { + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_CP, ASMOP_A, right->aop); + } + emit3 (A_RLC, ASMOP_A, 0); + cheapMove (result->aop, 0, ASMOP_A, 0, FALSE); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + else if (IS_BOOL (resulttype) && right->aop->size == 2 && + (aopInReg (right->aop, 0, X_IDX) && regDead (X_IDX, ic) || aopInReg (right->aop, 0, Y_IDX) && regDead (Y_IDX, ic))) + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0, 1); + + emit3w (A_NEGW, right->aop, 0); + cost (aopInReg (right->aop, 0, X_IDX) ? 1 : 2, 2); + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_RLC, ASMOP_A, 0); + cheapMove (result->aop, 0, ASMOP_A, 0, FALSE); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + else if (IS_BOOL (resulttype)) + { + bool a_need_clear = FALSE; + bool pushed_a = FALSE; + size = right->aop->size; + + for(offset = 1; offset < size; offset++) + if (aopInReg (right->aop, offset, A_IDX)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + break; + } + + if (!regDead (A_IDX, ic) && !pushed_a) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + for(offset = 0; offset < size; offset++) + { + const asmop *right_stacked = NULL; + int right_offset; + + right_stacked = stack_aop (right->aop, offset, &right_offset); + + if (offset && aopInReg (right->aop, offset, A_IDX)) + { + right_stacked = ASMOP_A; + right_offset = 0; + } + + if (!offset && aopInReg (right->aop, offset, A_IDX)) + { + emit3 (A_NEG, ASMOP_A, 0); + emit3 (A_CLR, ASMOP_A, 0); + } + else if (!right_stacked) + { + emit3 (A_CLR, ASMOP_A, 0); + emit3_o(offset ? A_SBC : A_SUB, ASMOP_A, 0, right->aop, offset); + a_need_clear = TRUE; + } + else + { + emit3 (A_CLR, ASMOP_A, 0); + emit2 (offset ? "sbc" : "sub", "a, (%d, sp)", right_offset); + a_need_clear = TRUE; + if (!aopInReg (right->aop, offset, A_IDX)) + pop (right_stacked, 0, 2); + } + } + if (a_need_clear) + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_RLC, ASMOP_A, 0); + cheapMove (result->aop, 0, ASMOP_A, 0, FALSE); + + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + else if (pushed_a) + adjustStack (1, FALSE, FALSE, FALSE); + } + else // Cast to signed type + { + bool pushed_a = FALSE; + + genMove_o (result->aop, 0, right->aop, 0, right->aop->size, regDead (A_IDX, ic), regDead (X_IDX, ic), regDead (Y_IDX, ic)); + + size = result->aop->size - right->aop->size; + offset = right->aop->size; + + if (size == 2 && (aopInReg (result->aop, offset, X_IDX) || aopInReg (result->aop, offset, Y_IDX)) && + (aopInReg (result->aop, right->aop->size - 1, XH_IDX) || aopInReg (result->aop, right->aop->size - 1, YH_IDX) || aopInReg (result->aop, right->aop->size - 1, A_IDX) || aopOnStack (result->aop, right->aop->size - 1, 1) || result->aop->type == AOP_DIR)) + { + symbol *tlbl = regalloc_dry_run ? 0 : newiTempLabel (0); + emit3w_o (A_CLRW, result->aop, offset, 0, 0); + + if (aopInReg (result->aop, right->aop->size - 1, XH_IDX)) + emit3w (A_TNZW, ASMOP_X, 0); + else if (aopInReg (result->aop, right->aop->size - 1, YH_IDX)) + emit3w (A_TNZW, ASMOP_Y, 0); + else + emit3_o (A_TNZ, result->aop, right->aop->size - 1, 0, 0); + + if (!regalloc_dry_run) + emit2 ("jrpl", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // 2 for cycle cost is just an estimate; it also ignores pipelining. + emit3w_o (A_DECW, result->aop, offset, 0, 0); + emitLabel (tlbl); + + goto release; + } + + if (result->aop->regs[A_IDX] >= 0 && result->aop->regs[A_IDX] < right->aop->size || !regDead (A_IDX, ic)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + + cheapMove (ASMOP_A, 0, result->aop, right->aop->size - 1, FALSE); + emit3 (A_RLC, ASMOP_A, 0); + + + if (size == 2 && (aopInReg (result->aop, offset, X_IDX) || aopInReg (result->aop, offset, Y_IDX))) // Faster when just setting 16-bit reg. + { + symbol *tlbl = regalloc_dry_run ? 0 : newiTempLabel (0); + emit3w_o (A_CLRW, result->aop, offset, 0, 0); + if (!regalloc_dry_run) + emit2 ("jrnc", "!tlabel", labelKey2num (tlbl->key)); + cost (2, 2); // 2 for cycle cost is just an estimate; it also ignores pipelining. + emit3w_o (A_DECW, result->aop, offset, 0, 0); + emitLabel (tlbl); + + if (pushed_a) + pop (ASMOP_A, 0, 1); + + goto release; + } + + emit3 (A_CLR, ASMOP_A, 0); + emit3 (A_SBC, ASMOP_A, ASMOP_ZERO); + while (size--) + { + if (size && aopInReg (result->aop, offset, A_IDX)) + { + push (ASMOP_A, 0, 1); + pushed_a = TRUE; + } + else + cheapMove (result->aop, offset, ASMOP_A, 0, FALSE); + offset++; + } + + if (pushed_a) + pop (ASMOP_A, 0, 1); + } + +release: + freeAsmop (right); + freeAsmop (result); +} + +/*-----------------------------------------------------------------*/ +/* genDummyRead - generate code for dummy read of volatiles */ +/*-----------------------------------------------------------------*/ +static void +genDummyRead (const iCode *ic) +{ + operand *op; + int i; + + if ((op = IC_RIGHT (ic)) && IS_SYMOP (op)) + { + aopOp (op, ic); + + D (emit2 ("; genDummyRead", "")); + + if (!regDead(A_IDX, ic) && op->aop->type == AOP_DIR) + for (i = 0; i < op->aop->size; i++) + emit3_o (A_TNZ, op->aop, i, 0, 0); + else + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0 ,1); + for (i = 0; i < op->aop->size; i++) + cheapMove (ASMOP_A, 0, op->aop, i, FALSE); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + + freeAsmop (op); + } + + if ((op = IC_LEFT (ic)) && IS_SYMOP (op)) + { + aopOp (op, ic); + + D (emit2 ("; genDummyRead", "")); + + if (!regDead(A_IDX, ic) && op->aop->type == AOP_DIR) + for (i = 0; i < op->aop->size; i++) + emit3_o (A_TNZ, op->aop, i, 0, 0); + else + { + if (!regDead (A_IDX, ic)) + push (ASMOP_A, 0 ,1); + for (i = 0; i < op->aop->size; i++) + cheapMove (ASMOP_A, 0, op->aop, i, FALSE); + if (!regDead (A_IDX, ic)) + pop (ASMOP_A, 0, 1); + } + + freeAsmop (op); + } +} + +/*-----------------------------------------------------------------*/ +/* resultRemat - result is to be rematerialized */ +/*-----------------------------------------------------------------*/ +static bool +resultRemat (const iCode *ic) +{ + if (SKIP_IC (ic) || ic->op == IFX) + return 0; + + if (IC_RESULT (ic) && IS_ITEMP (IC_RESULT (ic))) + { + const symbol *sym = OP_SYMBOL_CONST (IC_RESULT (ic)); + + if (!sym->remat) + return(false); + + bool completely_spilt = TRUE; + for (unsigned int i = 0; i < getSize (sym->type); i++) + if (sym->regs[i]) + completely_spilt = FALSE; + + if (completely_spilt) + return(true); + } + + return (false); +} + +/*---------------------------------------------------------------------*/ +/* genSTM8Code - generate code for STM8 for a single iCode instruction */ +/*---------------------------------------------------------------------*/ +static void +genSTM8iCode (iCode *ic) +{ + genLine.lineElement.ic = ic; + +#if 0 + if (!regalloc_dry_run) + printf ("ic %d op %d stack pushed %d\n", ic->key, ic->op, G.stack.pushed); +#endif + + if (resultRemat (ic)) + { + if (!regalloc_dry_run) + D (emit2 ("; skipping iCode since result will be rematerialized", "")); + return; + } + + if (ic->generated) + { + if (!regalloc_dry_run) + D (emit2 ("; skipping generated iCode", "")); + return; + } + + switch (ic->op) + { + case '!': + genNot (ic); + break; + + case '~': + genCpl (ic); + break; + + case UNARYMINUS: + genUminus (ic); + break; + + case IPUSH: + genIpush (ic); + break; + + case IPOP: + wassertl (0, "Unimplemented iCode"); + break; + + case CALL: + case PCALL: + genCall (ic); + break; + + case FUNCTION: + genFunction (ic); + break; + + case ENDFUNCTION: + genEndFunction (ic); + break; + + case RETURN: + genReturn (ic); + break; + + case LABEL: + genLabel (ic); + break; + + case GOTO: + genGoto (ic); + break; + + case '+': + genPlus (ic); + break; + + case '-': + genMinus (ic); + break; + + case '*': + genMult (ic); + break; + + case '/': + case '%': + genDivMod (ic); + break; + + case '>': + case '<': + case LE_OP: + case GE_OP: + genCmp(ic, ifxForOp (IC_RESULT (ic), ic)); + break; + + case NE_OP: + case EQ_OP: + genCmpEQorNE (ic, ifxForOp (IC_RESULT (ic), ic)); + break; + + case AND_OP: + case OR_OP: + wassertl (0, "Unimplemented iCode"); + break; + + case '^': + genXor (ic); + break; + + case '|': + genOr (ic); + break; + + case BITWISEAND: + genAnd (ic, ifxForOp (IC_RESULT (ic), ic)); + break; + + case INLINEASM: + genInline (ic); + break; + + case RRC: + case RLC: + wassertl (0, "Unimplemented iCode"); + break; + + case GETABIT: + genGetABit (ic, ifxForOp (IC_RESULT (ic), ic)); + break; + + case LEFT_OP: + genLeftShift (ic); + break; + + case RIGHT_OP: + genRightShift (ic); + break; + + case GET_VALUE_AT_ADDRESS: + genPointerGet (ic); + break; + + case SET_VALUE_AT_ADDRESS: + genPointerSet (ic); + break; + + case '=': + wassert (!POINTER_SET (ic)); + genAssign (ic); + break; + + case IFX: + genIfx (ic); + break; + + case ADDRESS_OF: + genAddrOf (ic); + break; + + case JUMPTABLE: + genJumpTab (ic); + break; + + case CAST: + genCast (ic); + break; + + case RECEIVE: + case SEND: + wassertl (0, "Unimplemented iCode"); + break; + + case DUMMY_READ_VOLATILE: + genDummyRead (ic); + break; + + case CRITICAL: + genCritical (ic); + break; + + case ENDCRITICAL: + genEndCritical (ic); + break; + + default: + wassertl (0, "Unknown iCode"); + } +} + +float +drySTM8iCode (iCode *ic) +{ + regalloc_dry_run = TRUE; + regalloc_dry_run_cost_bytes = 0; + regalloc_dry_run_cost_cycles = 0; + + initGenLineElement (); + + genSTM8iCode (ic); + + destroy_line_list (); + + wassert (regalloc_dry_run); + + const unsigned int byte_cost_weight = 2 << (optimize.codeSize * 3 + !optimize.codeSpeed * 3); + + return ((float)regalloc_dry_run_cost_bytes * byte_cost_weight + (float)regalloc_dry_run_cost_cycles * ic->count); +} + +/*---------------------------------------------------------------------*/ +/* genSTM8Code - generate code for STM8 for a block of intructions */ +/*---------------------------------------------------------------------*/ +void +genSTM8Code (iCode *lic) +{ + iCode *ic; + int clevel = 0; + int cblock = 0; + int cln = 0; + regalloc_dry_run = FALSE; + + /* if debug information required */ + if (options.debug && currFunc && !regalloc_dry_run) + debugFile->writeFunction (currFunc, lic); + + if (options.debug && !regalloc_dry_run) + debugFile->writeFrameAddress (NULL, NULL, 0); /* have no idea where frame is now */ + + for (ic = lic; ic; ic = ic->next) + { + initGenLineElement (); + + genLine.lineElement.ic = ic; + + if (ic->level != clevel || ic->block != cblock) + { + if (options.debug) + debugFile->writeScope (ic); + clevel = ic->level; + cblock = ic->block; + } + + if (ic->lineno && cln != ic->lineno) + { + if (options.debug) + debugFile->writeCLine (ic); + + if (!options.noCcodeInAsm) + emit2 (";", "%s: %d: %s", ic->filename, ic->lineno, printCLine (ic->filename, ic->lineno)); + cln = ic->lineno; + } + + regalloc_dry_run_cost_bytes = 0; + regalloc_dry_run_cost_cycles = 0; + + if (options.iCodeInAsm) + { + const char *iLine = printILine (ic); + emit2 ("; ic:", "%d: %s", ic->key, iLine); + dbuf_free (iLine); + } +#if 0 + emit2 (";", "count: %f", ic->count); +#endif + genSTM8iCode(ic); + +#if 0 + D (emit2 (";", "Cost for generated ic %d : (%d, %d)", ic->key, regalloc_dry_run_cost_bytes, regalloc_dry_run_cost_cycles)); +#endif + } + + if (options.debug) + debugFile->writeFrameAddress (NULL, NULL, 0); /* have no idea where frame is now */ + + /* now we are ready to call the + peephole optimizer */ + if (!options.nopeep) + peepHole (&genLine.lineHead); + + /* now do the actual printing */ + printLine (genLine.lineHead, codeOutBuf); + + /* destroy the line list */ + destroy_line_list (); +} + diff --git a/src/stm8/gen.h b/src/stm8/gen.h new file mode 100644 index 0000000..79f7d63 --- /dev/null +++ b/src/stm8/gen.h @@ -0,0 +1,90 @@ +/*------------------------------------------------------------------------- + gen.h - header file for code generation for 8051 + + Written By - Philipp Krause . pkk@spth.de (2012) + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +-------------------------------------------------------------------------*/ + +#ifndef STM8GEN_H +#define STM8GEN_H 1 + +typedef enum +{ + AOP_INVALID, + /* Is a literal */ + AOP_LIT = 1, + /* Is in a register */ + AOP_REG, + /* Is partially in registers, partially on the stack */ + AOP_REGSTK, + /* Is on the stack */ + AOP_STK, + /* Is a stack location */ + AOP_STL, + /* Is an immediate value */ + AOP_IMMD, + /* Is in direct space */ + AOP_DIR, + /* Read undefined, discard writes */ + AOP_DUMMY, + /* Has been optimized out by jumping directly (see ifxForOp) */ + AOP_CND +} +AOP_TYPE; + +/* asmop_byte: A type for the location a single byte + of an operand can be in */ +typedef struct asmop_byte +{ + bool in_reg; + union + { + reg_info *reg; /* Register this byte is in. */ + long int stk; /* Stack offset for this byte. */ + } byteu; +} asmop_byte; + +/* asmop: A homogenised type for all the different + spaces an operand can be in */ +typedef struct asmop +{ + AOP_TYPE type; + short size; + union + { + value *aop_lit; + struct + { + char *immd; + int immd_off; + }; + int stk_off; + char *aop_dir; + asmop_byte bytes[8]; + } aopu; + signed char regs[6]; // Byte of this aop that is in the register. -1 if no byte of this aop is in the reg. +} +asmop; + +void genSTM8Code (iCode *); +void stm8_emitDebuggerSymbol (const char *); + +extern bool stm8_assignment_optimal; +extern long int stm8_call_stack_size; +extern bool stm8_extend_stack; + +#endif + diff --git a/src/stm8/main.c b/src/stm8/main.c new file mode 100644 index 0000000..49731a0 --- /dev/null +++ b/src/stm8/main.c @@ -0,0 +1,556 @@ +/*------------------------------------------------------------------------- + main.c - STM8 specific definitions. + + Philipp Klaus Krause <pkk@spth.de> 2012-2013 + Valentin Dudouyt <valentin.dudouyt@gmail.com> 2013 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + In other words, you are welcome to use, share and improve this program. + You are forbidden to forbid anyone else to use, share and improve + what you give them. Help stamp out software-hoarding! +-------------------------------------------------------------------------*/ + +#include "common.h" + +#include "ralloc.h" +#include "gen.h" +#include "dbuf_string.h" +#include "peep.h" +#include "SDCCgas.h" + +#define OPTION_MEDIUM_MODEL "--model-medium" +#define OPTION_LARGE_MODEL "--model-large" +#define OPTION_CODE_SEG "--codeseg" +#define OPTION_CONST_SEG "--constseg" +#define OPTION_ELF "--out-fmt-elf" + +extern DEBUGFILE dwarf2DebugFile; +extern int dwarf2FinalizeFile(FILE *); + +static OPTION stm8_options[] = { + {0, OPTION_MEDIUM_MODEL, NULL, "16-bit address space for both data and code (default)"}, + {0, OPTION_LARGE_MODEL, NULL, "16-bit address space for data, 24-bit for code"}, + {0, OPTION_CODE_SEG, &options.code_seg, "<name> use this name for the code segment", CLAT_STRING}, + {0, OPTION_CONST_SEG, &options.const_seg, "<name> use this name for the const segment", CLAT_STRING}, + {0, OPTION_ELF, NULL, "Output executable in ELF format (deprecated)"}, + {0} +}; + +enum +{ + P_CODESEG = 1, + P_CONSTSEG, +}; + +static int +stm8_do_pragma (int id, const char *name, const char *cp) +{ + struct pragma_token_s token; + int processed = 1, error = 0; + + init_pragma_token (&token); + + switch (id) + { + case P_CODESEG: + case P_CONSTSEG: + { + char *segname; + + cp = get_pragma_token (cp, &token); + if (token.type == TOKEN_EOL) + { + error = 1; + break; + } + else + segname = Safe_strdup (get_pragma_string (&token)); + + cp = get_pragma_token (cp, &token); + if (token.type != TOKEN_EOL) + { + Safe_free (segname); + error = 1; + break; + } + else + { + if (id == P_CODESEG) + { + if (options.code_seg) + Safe_free (options.code_seg); + options.code_seg = segname; + } + else + { + if (options.const_seg) + Safe_free (options.const_seg); + options.const_seg = segname; + } + } + } + break; + default: + processed = 0; + break; + } + + if (error) + werror (W_BAD_PRAGMA_ARGUMENTS, name); + + free_pragma_token (&token); + return processed; +} + +static struct pragma_s stm8_pragma_tbl[] = { + {"codeseg", P_CODESEG, 0, stm8_do_pragma}, + {"constseg", P_CONSTSEG, 0, stm8_do_pragma}, + {NULL, 0, 0, NULL}, +}; + +static int +stm8_process_pragma (const char *s) +{ + return process_pragma_tbl (stm8_pragma_tbl, s); +} + +static char stm8_defaultRules[] = { +#include "peeph.rul" + "" +}; + + +static char *stm8_keywords[] = { + "at", + "critical", + "interrupt", + "trap", + "naked", + NULL +}; + +static void +stm8_genAssemblerEnd (FILE *of) +{ + if (options.out_fmt == 'E' && options.debug) + { + dwarf2FinalizeFile (of); + } +} + +static void +stm8_init (void) +{ + asm_addTree (&asm_asxxxx_mapping); +} + + +static void +stm8_reset_regparm (struct sym_link *funcType) +{ +} + +static int +stm8_reg_parm (sym_link * l, bool reentrant) +{ + return FALSE; +} + +static bool +stm8_parseOptions (int *pargc, char **argv, int *i) +{ + const char *const arg = argv[*i]; + + if (!strcmp (arg, OPTION_ELF)) + { + options.out_fmt = 'E'; + debugFile = &dwarf2DebugFile; + return TRUE; + } + + return FALSE; +} + +static void +stm8_finaliseOptions (void) +{ + port->mem.default_local_map = data; + port->mem.default_globl_map = data; + + if (options.model == MODEL_LARGE) + { + port->s.funcptr_size = 3; + port->stack.call_overhead = 3; + port->jumptableCost.maxCount = 0; + } +} + +static void +stm8_setDefaultOptions (void) +{ + options.nopeep = 0; + options.stackAuto = 1; + options.intlong_rent = 1; + options.float_rent = 1; + options.noRegParams = 0; + options.data_loc = 0x0001; /* We can't use the byte at address zero in C, since NULL pointers have special meaning */ + options.code_loc = 0x8000; + + options.stack_loc = -1; /* Do not set the stack pointer in software- just use the device-specific reset value. */ + + options.out_fmt = 'i'; /* Default output format is ihx */ +} + +static const char * +stm8_getRegName (const struct reg_info *reg) +{ + if (reg) + return reg->name; + return "err"; +} + +static void +stm8_genExtraArea (FILE *of, bool hasMain) +{ + fprintf (of, "\n; default segment ordering for linker\n"); + tfprintf (of, "\t!area\n", HOME_NAME); + tfprintf (of, "\t!area\n", STATIC_NAME); + tfprintf (of, "\t!area\n", port->mem.post_static_name); + tfprintf (of, "\t!area\n", CONST_NAME); + tfprintf (of, "\t!area\n", "INITIALIZER"); + tfprintf (of, "\t!area\n", CODE_NAME); + fprintf (of, "\n"); +} + +static void +stm8_genInitStartup (FILE *of) +{ + fprintf (of, "__sdcc_gs_init_startup:\n"); + + if (options.stack_loc >= 0) + { + fprintf (of, "\tldw\tx, #0x%04x\n", options.stack_loc); + fprintf (of, "\tldw\tsp, x\n"); + } + + /* Init static & global variables */ + fprintf (of, "__sdcc_init_data:\n"); + fprintf (of, "; stm8_genXINIT() start\n"); + + /* Zeroing memory (required by standard for static & global variables) */ + fprintf (of, "\tldw x, #l_DATA\n"); + fprintf (of, "\tjreq\t00002$\n"); + fprintf (of, "00001$:\n"); + fprintf (of, "\tclr (s_DATA - 1, x)\n"); + fprintf (of, "\tdecw x\n"); + fprintf (of, "\tjrne\t00001$\n"); + fprintf (of, "00002$:\n"); + + /* Copy l_INITIALIZER bytes from s_INITIALIZER to s_INITIALIZED */ + fprintf (of, "\tldw\tx, #l_INITIALIZER\n"); + fprintf (of, "\tjreq\t00004$\n"); + fprintf (of, "00003$:\n"); + fprintf (of, "\tld\ta, (s_INITIALIZER - 1, x)\n"); + fprintf (of, "\tld\t(s_INITIALIZED - 1, x), a\n"); + fprintf (of, "\tdecw\tx\n"); + fprintf (of, "\tjrne\t00003$\n"); + fprintf (of, "00004$:\n"); + fprintf (of, "; stm8_genXINIT() end\n"); +} + +#define STM8_INTERRUPTS_COUNT 30 + +int +stm8_genIVT(struct dbuf_s * oBuf, symbol ** intTable, int intCount) +{ + int i; + dbuf_tprintf (oBuf, "\tint s_GSINIT ; reset\n"); + + if(intCount > STM8_INTERRUPTS_COUNT) + { + werror(E_INT_BAD_INTNO, intCount - 1); + intCount = STM8_INTERRUPTS_COUNT; + } + + if (interrupts[INTNO_TRAP] || intCount) + dbuf_printf (oBuf, "\tint %s ; trap\n", interrupts[INTNO_TRAP] ? interrupts[INTNO_TRAP]->rname : "0x000000"); + + for (i = 0; i < intCount; i++) + dbuf_printf (oBuf, "\tint %s ; int%d\n", interrupts[i] ? interrupts[i]->rname : "0x000000", i); + + return TRUE; +} + +/*----------------------------------------------------------------------*/ +/* stm8_dwarfRegNum - return the DWARF register number for a register. */ +/*----------------------------------------------------------------------*/ +static int +stm8_dwarfRegNum (const struct reg_info *reg) +{ + return reg->rIdx; +} + +static bool +_hasNativeMulFor (iCode *ic, sym_link *left, sym_link *right) +{ + int result_size = IS_SYMOP (IC_RESULT (ic)) ? getSize (OP_SYM_TYPE (IC_RESULT(ic))) : 4; + sym_link *test = NULL; + + if (IS_LITERAL (left)) + test = left; + else if (IS_LITERAL (right)) + test = right; + + switch (ic->op) + { + case '/': + case '%': + return (getSize (left) <= 2 && IS_UNSIGNED (left) && getSize (right) <= 2 && IS_UNSIGNED (right)); + case '*': + { + if (result_size == 1 || getSize (left) <= 1 && getSize (right) <= 1 && result_size == 2 && IS_UNSIGNED (left) && IS_UNSIGNED (right)) + return TRUE; + + if ((getSize (left) != 2 || getSize (right) != 2) || result_size != 2 || !test) + return FALSE; + + unsigned long long add, sub; + int topbit, nonzero; + + + if (floatFromVal (valFromType (test)) < 0 || csdOfVal (&topbit, &nonzero, &add, &sub, valFromType (test))) + return FALSE; + + int shifts = topbit; + + // If the leading digits of the cse are 1 0 -1 we can use 0 1 1 instead to reduce the number of shifts. + if (topbit >= 2 && (add & (1ull << topbit)) && (sub & (1ull << (topbit - 2)))) + shifts--; + + wassert (nonzero); + + // Shifts are 1 byte, additions and subtractions are 3 bytes. + if (shifts + 3 * (nonzero - 1) <= 9 - optimize.codeSize + 3 * optimize.codeSpeed) + return TRUE; + + return FALSE; + } + default: + return FALSE; + } +} + +/* Indicate which extended bit operations this port supports */ +static bool +hasExtBitOp (int op, int size) +{ + return (op == GETABIT); +} + +static const char * +get_model (void) +{ + switch (options.model) + { + case MODEL_MEDIUM: + return ("stm8"); + break; + case MODEL_LARGE: + return ("stm8-large"); + break; + default: + werror (W_UNKNOWN_MODEL, __FILE__, __LINE__); + return "unknown"; + } +} + +/** $1 is always the basename. + $2 is always the output file. + $3 varies + $l is the list of extra options that should be there somewhere... + MUST be terminated with a NULL. +*/ +static const char *_linkCmd[] = +{ + "sdldstm8", "-nf", "\"$1\"", NULL +}; + +/* $3 is replaced by assembler.debug_opts resp. port->assembler.plain_opts */ +static const char *stm8AsmCmd[] = +{ + "sdasstm8", "$l", "$3", "\"$1.asm\"", NULL +}; + +static const char *const _libs_stm8[] = { "stm8", NULL, }; + +PORT stm8_port = +{ + TARGET_ID_STM8, + "stm8", + "STM8", /* Target name */ + NULL, /* Processor name */ + { + glue, + TRUE, /* We want stm8_genIVT to be triggered */ + MODEL_MEDIUM | MODEL_LARGE, + MODEL_MEDIUM, + &get_model, /* model string used as library destination */ + gas_glue + }, + { /* Assembler */ + stm8AsmCmd, + NULL, + "-plosgffwy", /* Options with debug */ + "-plosgffw", /* Options without debug */ + 0, + ".asm" + }, + { /* Linker */ + _linkCmd, + NULL, //LINKCMD, + NULL, + ".rel", + 1, + NULL, /* crt */ + _libs_stm8, /* libs */ + }, + { /* Peephole optimizer */ + stm8_defaultRules, + stm8instructionSize, + NULL, + NULL, + NULL, + stm8notUsed, + stm8canAssign, + stm8notUsedFrom, + NULL, + }, + /* Sizes: char, short, int, long, long long, ptr, fptr, gptr, bit, float, max */ + { + 1, /* char */ + 2, /* short */ + 2, /* int */ + 4, /* long */ + 8, /* long long */ + 2, /* near ptr */ + 2, /* far ptr */ + 2, /* generic ptr */ + 2, /* func ptr */ + 0, /* banked func ptr */ + 1, /* bit */ + 4, /* float */ + }, + /* tags for generic pointers */ + { 0x00, 0x40, 0x60, 0x80 }, /* far, near, xstack, code */ + { + "XSEG", + "STACK", + "CODE", /* code */ + "DATA", /* data */ + NULL, /* idata */ + NULL, /* pdata */ + NULL, /* xdata */ + NULL, /* bit */ + "RSEG (ABS)", /* reg */ + "GSINIT", /* static initialization */ + NULL, /* overlay */ + "GSFINAL", /* gsfinal */ + "HOME", /* home */ + NULL, /* xidata */ + NULL, /* xinit */ + "CONST", /* const_name */ + "CABS (ABS)", /* cabs_name */ + "DABS (ABS)", /* xabs_name */ + NULL, /* iabs_name */ + "INITIALIZED", /* name of segment for initialized variables */ + "INITIALIZER", /* name of segment for copies of initialized variables in code space */ + NULL, + NULL, + 1, /* CODE is read-only */ + 1 /* No fancy alignments supported. */ + }, + { stm8_genExtraArea, NULL }, + { /* stack information */ + -1, /* direction */ + 0, + 7, /* isr overhead */ + 2, /* call overhead */ + 0, + 2, + 1, /* sp points to next free stack location */ + }, + { -1, TRUE }, + { stm8_emitDebuggerSymbol, + { + stm8_dwarfRegNum, + 0, /* cfiSame */ + 0, /* cfiUndef */ + 4, /* addressSize */ + 9, /* regNumRet */ + SP_IDX, /* regNumSP */ + 0, /* regNumBP */ + 2, /* offsetSP */ + }, + }, + { + 32767, /* maxCount */ + 2, /* sizeofElement */ + {4, 5, 5}, /* sizeofMatchJump[] - assuming operand in reg, inverse can be optimized away - would be much higher otherwise */ + {4, 5, 5}, /* sizeofRangeCompare[] - same as above */ + 3, /* sizeofSubtract - assuming 2 byte index, would be 2 otherwise */ + 5, /* sizeofDispatch - 1 byte for sllw followed by 3 bytes for ldw x, (..., X) and 2 byte for jp (x) */ + }, + "_", + stm8_init, + stm8_parseOptions, + stm8_options, + NULL, + stm8_finaliseOptions, + stm8_setDefaultOptions, + stm8_assignRegisters, + stm8_getRegName, + 0, + NULL, + stm8_keywords, + NULL, + stm8_genAssemblerEnd, + stm8_genIVT, + 0, /* no genXINIT code */ + stm8_genInitStartup, /* genInitStartup */ + stm8_reset_regparm, + stm8_reg_parm, + stm8_process_pragma, /* process_pragma */ + NULL, /* getMangledFunctionName */ + _hasNativeMulFor, /* hasNativeMulFor */ + hasExtBitOp, /* hasExtBitOp */ + NULL, /* oclsExpense */ + TRUE, + FALSE, /* little endian */ + 0, /* leave lt */ + 0, /* leave gt */ + 1, /* transform <= to ! > */ + 1, /* transform >= to ! < */ + 1, /* transform != to !(a == b) */ + 0, /* leave == */ + FALSE, /* Array initializer support. */ + 0, /* no CSE cost estimation yet */ + NULL, /* builtin functions */ + GPOINTER, /* treat unqualified pointers as "generic" pointers */ + 1, /* reset labelKey to 1 */ + 1, /* globals & local statics allowed */ + 5, /* Number of registers handled in the tree-decomposition-based register allocator in SDCCralloc.hpp */ + PORT_MAGIC +}; diff --git a/src/stm8/peep.c b/src/stm8/peep.c new file mode 100644 index 0000000..238b7c1 --- /dev/null +++ b/src/stm8/peep.c @@ -0,0 +1,1236 @@ +#include "common.h" +#include "SDCCicode.h" +#include "SDCCglobl.h" +#include "SDCCgen.h" + +#include "peep.h" + +#define NOTUSEDERROR() do {werror(E_INTERNAL_ERROR, __FILE__, __LINE__, "error in notUsed()");} while(0) + +// #define D(_s) { printf _s; fflush(stdout); } +#define D(_s) + +#define EQUALS(l, i) (!STRCASECMP((l), (i))) +#define ISINST(l, i) (!STRNCASECMP((l), (i), sizeof(i) - 1) && (!(l)[sizeof(i) - 1] || isspace((unsigned char)((l)[sizeof(i) - 1])))) +#define STARTSINST(l, i) (!STRNCASECMP((l), (i), sizeof(i) - 1)) + +typedef enum +{ + S4O_CONDJMP, + S4O_WR_OP, + S4O_RD_OP, + S4O_TERM, + S4O_VISITED, + S4O_ABORT, + S4O_CONTINUE +} S4O_RET; + +static struct +{ + lineNode *head; +} _G; + +/*----------------------------------------------------------------------------*/ +/* strNextCharBlock - Returns the next block of chars (after spaces, comma) */ +/* Leading spaces and Current block are skipped and search stops at next block*/ +/* Valid block separators are: ' ' and ',' */ +/* If no block is found (EOS or ';'), returns NULL */ +/*----------------------------------------------------------------------------*/ +static char * +strNextCharBlock(const char *str) +{ + if (!str || !str[0]) + return 0; + + while (isblank ((unsigned char)(str[0]))) + str++; // skip leading blanks + + while (str[0] && !isblank ((unsigned char)(str[0])) && str[0] != ';') + { + if (str[0] == ',') + { + str++; // current block is finished with ',' + break; + } + str++; // next char of current block + } + + while (isblank ((unsigned char)(str[0]))) + str++; // skip trailing blanks + + if (str[0] && str[0] != ';') + return (char *)str; + return 0; +} + +static bool +isInt(const char *str) +{ + int ret; + while(str[0] == '#' || str[0] == '(' || str[0] == '[' || isspace ((unsigned char)str[0])) + str++; + if(sscanf(str, "0x%x", &ret)) + return(ret); + if(!sscanf(str, "%d", &ret)) + return(false); + return(true); +} + +static int +readint(const char *str) +{ + int ret; + while(str[0] == '#' || str[0] == '(' || str[0] == '[' || isspace ((unsigned char)str[0])) + str++; + if(sscanf(str, "0x%x", &ret)) + return(ret); + if(!sscanf(str, "%d", &ret)) + { + wassertl (0, "readint() got non-integer argument:"); + fprintf (stderr, "%s\n", str); + ret = -1; + } + return(ret); +} + +static int +isReg(const char *what) +{ + if(what[0] == '(') + what++; + if(what[0] == 'a' || what[0] == 'x' || what[0] == 'y') + return(true); + if(!strcmp(what, "sp")) + return(true); + return(false); +} + +static char * +nextToken(char *p) +{ + /* strtok replacement */ + static char *str, *ret, *end; + // Use an internal buffer to prevent *p from being modified + static char buf[128]; + if(p) { + strncpy(buf, p, sizeof(buf)); + buf[sizeof(buf)-1] = '\0'; + str = buf; + end = buf + strlen(p); + } + if(str >= end) + return(NULL); + ret = str; + // Strip separators + while(*str == ',' || isspace(*str)) + str++; + if(*str == '(') + { + // Take an expression in brackets + while(*str && *str != ')') + str++; + str++; + } else { + // Take until EOL or separator + while(*str && *str != ',' && !isspace(*str)) + str++; + } + *str = '\0'; + str++; + return(ret); +} + +static bool +isRelativeAddr(const char *what, const char *mode) +{ + char buf[4]; + strcpy(buf, mode); + strcat(buf, ")"); + return(what[0] == '(' && strstr(what, buf)); +} + +static bool +isLabel(const char *what) +{ + const char *end; + + end = strchr(what, '+'); + if(!end) + end = what + strlen(what); + if(what[0] == '(' && !strchr(what, ',')) + what++; + if(what[0] == '#') + return (what[1] == '_' || what[1] == '<' || what[1] == '>'); + return(what[0] == '_' || *(end-1) == '$'); +} + +static bool +isImmediate(const char *what) +{ + return(what[0] == '#'); +} + +static bool +isShortoff(const char *what, const char *mode) +{ + return(isRelativeAddr(what, mode) && isInt(what) && readint(what) <= 0xff); +} + +static bool +isLongoff(const char *what, const char *mode) +{ + return(isRelativeAddr(what, mode) && (!isInt(what) || readint(what) > 0xff)); +} + +static bool +isPtr(const char *what) +{ + return(what[0] == '[' || what[0] == '(' && (what[1] == '[' || what[1] == '(')); +} + +static bool +isSpIndexed(const char *what) +{ + return isRelativeAddr(what, "sp"); +} + +/*-----------------------------------------------------------------*/ +/* stm8InstIsRegToReg - Checks if 'line' is a reg to reg move */ +/* isword == FALSE : Look for registers a, xl, xh, yl & yh */ +/* isword == TRUE : Look for registers x, y & sp */ +/*-----------------------------------------------------------------*/ +static bool +stm8InstIsRegToReg(const char *line, bool isword) +{ + int regNumber = 0; + + if ((line = strNextCharBlock(line))) + { + while(line[0]) + { + bool regFound = false; + char chrLow = tolower ((unsigned char)line[0]); + + // Check for register names + if (isword) + { + if (chrLow == 'x' || chrLow == 'y') + regFound = true; + + if (chrLow == 's') + { + line++; + if (line[0] == 'p') + regFound = true; + else + return false; + } + } + else + { + if (chrLow == 'a') + regFound = true; + + if (chrLow == 'x' || chrLow == 'y') + { + line++; + chrLow = tolower ((unsigned char)line[0]); + if (chrLow == 'h' || chrLow == 'l') + regFound = true; + else + return false; + } + } + + // If register, process next character + if (regFound) + line++; + + // Continue only if valid separator or end + if (!line[0] || line[0] == ',' || isblank (line[0])) + { + if (regFound) + regNumber++; + } + else + { + return false; + } + + // Next char if not eos + if(line[0]) + line++; + } + } + return (regNumber == 2); +} + +int +stm8instructionSize(lineNode *pl) +{ // this function is quite rough, it makes all indirect addressing cases to the longest. + char *operand; + char *op1start; + char *op2start; + + operand = nextToken(pl->line); + op1start = nextToken(NULL); + op2start = nextToken(NULL); + + while(op1start && isspace((unsigned char)op1start[0])) op1start++; + while(op2start && isspace((unsigned char)op2start[0])) op2start++; + //printf("line=%s operand=%s op1start=%s op2start=%s\n", pl->line, operand, op1start, op2start); + + /* Operations that always costs 1 byte */ + if (ISINST(operand, "ccf") + || ISINST(operand, "divw") + || ISINST(operand, "exgw") + || ISINST(operand, "iret") + || ISINST(operand, "nop") + || ISINST(operand, "rcf") + || ISINST(operand, "ret") + || ISINST(operand, "retf") + || ISINST(operand, "rvf") + || ISINST(operand, "break") + || ISINST(operand, "halt") + || ISINST(operand, "rim") + || ISINST(operand, "trap") + || ISINST(operand, "wfi") + || ISINST(operand, "sim") + || ISINST(operand, "scf")) + return 1; + + /* Operations that always costs 3 byte */ + if(ISINST(operand, "jrh") + || ISINST(operand, "jrnh") + || ISINST(operand, "jril") + || ISINST(operand, "jrih") + || ISINST(operand, "jrm") + || ISINST(operand, "jrnm")) + return 3; + + /* Operations that always costs 2 byte */ + if(STARTSINST(operand, "jr") + || ISINST(operand, "callr") + || ISINST(operand, "wfe")) + return 2; + + /* Operations that always costs 4 byte */ + if(ISINST(operand, "bccm") + || ISINST(operand, "bcpl") + || ISINST(operand, "bres") + || ISINST(operand, "bset") + || ISINST(operand, "callf") + || ISINST(operand, "int") + || ISINST(operand, "jpf")) + return 4; + + /* Operations that always costs 5 byte */ + if(ISINST(operand, "btjf") + || ISINST(operand, "btjt")) + return 5; + + if (EQUALS(operand, "push") + || EQUALS(operand, "pop")) + { + wassert (op1start); + if (!strcmp(op1start, "a")) + return 1; + if (!strcmp(op1start, "cc")) + return 1; + if (isImmediate(op1start)) // immediate + return 2; + else // longmem + return 3; + } + + /* arity=1 */ + if(EQUALS(operand, "clr") + || EQUALS(operand, "dec") + || EQUALS(operand, "inc") + || EQUALS(operand, "swap") + || EQUALS(operand, "jp") + || EQUALS(operand, "call") + || EQUALS(operand, "cpl") + || EQUALS(operand, "neg") + || EQUALS(operand, "sll") + || EQUALS(operand, "sla") + || EQUALS(operand, "srl") + || EQUALS(operand, "sra") + || EQUALS(operand, "rlc") + || EQUALS(operand, "rrc") + || EQUALS(operand, "tnz")) + { + int i = 0; + + wassert (op1start); + if(!strcmp(op1start, "a") || !strcmp(op1start, "(x)")) + return(1); + if(!strcmp(op1start, "(y)")) + return(2); + if(op1start[0] == '('|| op1start[0] == '[') + op1start++; + if(strstr(op1start, ",y)")) + i++; // costs extra byte for operating with y + if ((ISINST(operand, "jp") || ISINST(operand, "call")) && *op1start != '(' && *op1start != '[') // jp and call are 3 bytes for direct long addressing mode. + return(3); + if(isLabel(op1start)) + return(4); + if(readint(op1start) <= 0xFF) + return(2+i); + /* op1 > 0xFF */ + if((ISINST(operand, "jp") || ISINST(operand, "call")) && !strchr(op1start, 'y')) + return(3); + return(4); + } + + if(EQUALS(operand, "exg")) + { + assert (!strcmp(op1start, "a") && op2start != NULL); + if(isReg(op2start)) + return(1); + else + return(3); + } + + if(EQUALS(operand, "addw") || EQUALS(operand, "subw")) + { + assert (op1start != NULL); + if(!strcmp(op1start, "sp")) + return(2); + if(isImmediate(op2start) && op1start[0] == 'y') + return(4); + if(isImmediate(op2start) && op1start[0] == 'x') + return(3); + if(isSpIndexed(op2start)) + return(3); + return(4); + } + + if(ISINST(operand, "cplw")) + { + assert (op1start != NULL); + if(op1start[0] == 'y') + return(2); + else + return(1); + } + + if(ISINST(operand, "ldf")) + { + assert (op1start != NULL); + if(isRelativeAddr(op1start, "y") || isRelativeAddr(op2start, "y")) + return(5); + else + return(4); + } + + /* Operations that costs 2 or 3 bytes for immediate */ + if(STARTSINST(operand, "ld") + || !strncmp(operand, "cp", 2) + || EQUALS(operand, "adc") + || EQUALS(operand, "add") + || EQUALS(operand, "and") + || EQUALS(operand, "bcp") + || EQUALS(operand, "or") + || EQUALS(operand, "sbc") + || EQUALS(operand, "sub") + || EQUALS(operand, "xor")) + { + int i = 0; + char suffix; + wassert (op1start && op2start); + suffix = operand[strlen(operand)-1]; + if(suffix == 'w' && isImmediate(op2start)) + { + i++; // costs extra byte + if(!strcmp(op1start, "y")) + i++; + } + if(isImmediate(op2start)) + return(2+i); // ld reg, #immd + if(isSpIndexed(op1start) || isSpIndexed(op2start)) + return(2); + if(!strcmp(op1start, "(x)") || !strcmp(op2start, "(x)")) + return(1); + if(!strcmp(op1start, "(y)") || !strcmp(op2start, "(y)")) + return(2); + if(isShortoff(op1start, "x") || isShortoff(op2start, "x")) + return(2); + if(isShortoff(op1start, "y") || isShortoff(op2start, "y")) + return(3); + if(isLongoff(op1start, "x") || isLongoff(op2start, "x")) + return(3); + if(isLongoff(op1start, "y") || isLongoff(op2start, "y")) + return(4); + if(isPtr(op1start) || isPtr(op2start)) + return(4); + if(strchr(op1start, 'y') || strchr(op2start, 'y')) + i++; // costs extra byte for operating with y + if(isLabel(op1start) || isLabel(op2start)) + return(3+i); + if(isReg(op1start) && isReg(op2start)) + { + if (!strncmp(op1start, "x", 1) && (!strncmp(op2start, "y", 1) || !strncmp(op2start, "sp", 2)) + || !strncmp(op1start, "sp", 2) && !strncmp(op2start, "x", 1)) + return(1); + return(1+i); + } + if(!strcmp(op2start, "a")) + return(3); + if(readint(op2start) <= 0xFF) + return(2+i); + else + return(3+i); + return 4; + } + + /* mov costs 3, 4 or 5 bytes depending on its addressing mode */ + if(ISINST(operand, "mov")) { + assert (op1start != NULL && op2start != NULL); + if(isImmediate(op2start)) + return(4); + if(isLabel(op2start)) + return(5); + if(readint(op2start) <= 0xFF) + return(3); + if(readint(op2start) > 0xFF) + return(5); + } + + /* Operations that costs 2 or 1 bytes depending on + is the Y or X register used */ + if(EQUALS(operand, "clrw") + || EQUALS(operand, "decw") + || EQUALS(operand, "div") + || EQUALS(operand, "incw") + || EQUALS(operand, "mul") + || EQUALS(operand, "negw") + || EQUALS(operand, "popw") + || EQUALS(operand, "pushw") + || EQUALS(operand, "rlcw") + || EQUALS(operand, "rlwa") + || EQUALS(operand, "rrcw") + || EQUALS(operand, "rrwa") + || EQUALS(operand, "sllw") + || EQUALS(operand, "slaw") + || EQUALS(operand, "sraw") + || EQUALS(operand, "srlw") + || EQUALS(operand, "swapw") + || EQUALS(operand, "tnzw")) + { + assert (op1start != NULL); + if((op1start && !strcmp(op1start, "y")) || (op2start && !strcmp(op2start, "y"))) + return(2); + else + return(1); + } + + if(ISINST(pl->line, ".db") || ISINST(pl->line, ".byte")) + { + int i, j; + for(i = 1, j = 0; pl->line[j]; i += pl->line[j] == ',', j++); + return(i); + } + + if(ISINST(pl->line, ".dw") || ISINST(pl->line, ".word")) + { + int i, j; + for(i = 1, j = 0; pl->line[j]; i += pl->line[j] == ',', j++); + return(i * 2); + } + + return(5); // Maximum instruction size, e.g. btjt. +} + +/*-----------------------------------------------------------------*/ +/* incLabelJmpToCount - increment counter "jmpToCount" in entry */ +/* of the list labelHash */ +/*-----------------------------------------------------------------*/ +static bool +incLabelJmpToCount (const char *label) +{ + labelHashEntry *entry; + + entry = getLabelRef (label, _G.head); + if (!entry) + return FALSE; + entry->jmpToCount++; + return TRUE; +} + +/*-----------------------------------------------------------------*/ +/* findLabel - */ +/* 1. extracts label in the opcode pl */ +/* 2. increment "label jump-to count" in labelHash */ +/* 3. search lineNode with label definition and return it */ +/*-----------------------------------------------------------------*/ +static lineNode * +findLabel (const lineNode *pl) +{ + char *p; + lineNode *cpl; + + /* 1. extract label in opcode */ + + /* In each jump the label is at the end */ + p = strlen (pl->line) - 1 + pl->line; + + /* Skip trailing whitespace */ + while(isspace(*p)) + p--; + + /* scan backward until space or ',' */ + for (; p > pl->line; p--) + if (isspace(*p) || *p == ',') + break; + + /* sanity check */ + if (p == pl->line) + { + NOTUSEDERROR(); + return NULL; + } + + /* skip ',' resp. '\t' */ + ++p; + + /* 2. increment "label jump-to count" */ + if (!incLabelJmpToCount (p)) + return NULL; + + /* 3. search lineNode with label definition and return it */ + for (cpl = _G.head; cpl; cpl = cpl->next) + { + if ( cpl->isLabel + && strncmp (p, cpl->line, strlen(p)) == 0) + { + return cpl; + } + } + return NULL; +} + +/* Check if reading arg implies reading what. */ +static bool argCont(const char *arg, char what) +{ + if (arg == NULL || strlen (arg) == 0 || !(what == 'a' || what == 'x' || what == 'y')) + return FALSE; + + while (isblank ((unsigned char)(arg[0]))) + arg++; + + if (arg[0] == ',') + arg++; + + while (isblank ((unsigned char)(arg[0]))) + arg++; + + if (arg[0] == '#') + return FALSE; + + if (arg[0] == '(' && arg[1] == '0' && (tolower(arg[2])) == 'x') + arg += 3; // Skip hex prefix to avoid false x positive. + + if (strlen(arg) == 0) + return FALSE; + + if (arg[0] == '_' && what == 'a') // The STM8 has no a-relative addressing modes. + return FALSE; + + return (strchr(arg, what) != NULL); +} + +static bool +isReturned(const char *what) +{ + symbol *sym; + sym_link *sym_lnk; + int size; + lineNode *l; + + l = _G.head; + do + { + l = l->next; + } while(l->isComment || l->ic == NULL || l->ic->op != FUNCTION); + + sym = OP_SYMBOL(IC_LEFT(l->ic)); + + if(sym && IS_DECL(sym->type)) + { + // Find size of return value. + specifier *spec; + if(sym->type->select.d.dcl_type != FUNCTION) + NOTUSEDERROR(); + spec = &(sym->etype->select.s); + if(spec->noun == V_VOID) + size = 0; + else if(spec->noun == V_CHAR || spec->noun == V_BOOL) + size = 1; + else if(spec->noun == V_INT && !(spec->b_long)) + size = 2; + else + size = 4; + + // Check for returned pointer. + sym_lnk = sym->type; + while (sym_lnk && !IS_PTR (sym_lnk)) + sym_lnk = sym_lnk->next; + if(IS_PTR(sym_lnk)) + size = 2; + } + else + { + NOTUSEDERROR(); + return TRUE; + } + + switch(*what) + { + case 'a': + return(size == 1); + case 'x': + return(size > 1); + case 'y': + return(size > 2); + default: + return FALSE; + } +} + +static bool +stm8MightReadFlag(const lineNode *pl, const char *what) +{ + if (strcmp (what, "c") && strcmp (what, "n") && strcmp (what, "z")) + return true; + + if (ISINST (pl->line, "push")) + return (pl->line[5] == 'c'); + + if (!strcmp (what, "n")) + return (ISINST (pl->line, "jrmi") || ISINST (pl->line, "jrpl") || ISINST (pl->line, "jrsge") || ISINST (pl->line, "jrsgte") || ISINST (pl->line, "jrsle") || ISINST (pl->line, "jrslt")); + + if (!strcmp (what, "z")) + return (ISINST (pl->line, "jreq") || ISINST (pl->line, "jrne") || ISINST (pl->line, "jrsgte") || ISINST (pl->line, "jrsle")); + + if (!strcmp (what, "c")) + return (ISINST (pl->line, "jrc") || ISINST (pl->line, "jrnc") || ISINST (pl->line, "jruge") || ISINST (pl->line, "jrugt") || ISINST (pl->line, "jrule") || ISINST (pl->line, "jrult") || + ISINST (pl->line, "adc") || ISINST (pl->line, "sbc") || + ISINST (pl->line, "ccf") || ISINST (pl->line, "rlc") || ISINST (pl->line, "rlcw") || ISINST (pl->line, "rrc") || ISINST (pl->line, "rrcw")); + + return true; +} + +static bool +stm8MightRead(const lineNode *pl, const char *what) +{ + char extra = 0; + + if (!strcmp (what, "xl") || !strcmp (what, "xh")) + extra = 'x'; + else if (!strcmp (what, "yl") || !strcmp (what, "yh")) + extra = 'y'; + else if (strcmp (what, "a") != 0) + return stm8MightReadFlag(pl, what); + + if (!extra) + { + if (ISINST (pl->line, "adc") + || ISINST (pl->line, "and") + || ISINST (pl->line, "bcp") + || ISINST (pl->line, "cp") + || ISINST (pl->line, "div") + || ISINST (pl->line, "mul") + || ISINST (pl->line, "or") + || ISINST (pl->line, "rlwa") + || ISINST (pl->line, "rrwa") + || ISINST (pl->line, "sbc") + || ISINST (pl->line, "trap") + || ISINST (pl->line, "xor")) + return TRUE; + + if ((ISINST (pl->line, "add") + || ISINST (pl->line, "cpl") + || ISINST (pl->line, "dec") + || ISINST (pl->line, "exg") + || ISINST (pl->line, "inc") + || ISINST (pl->line, "neg") + || ISINST (pl->line, "rlc") + || ISINST (pl->line, "rrc") + || ISINST (pl->line, "sll") + || ISINST (pl->line, "sla") + || ISINST (pl->line, "sra") + || ISINST (pl->line, "srl") + || ISINST (pl->line, "sub") + || ISINST (pl->line, "tnz")) && + pl->line[4] == 'a') + return TRUE; + + if ((ISINST (pl->line, "push") + || ISINST (pl->line, "swap")) && + pl->line[5] == 'a') + return TRUE; + + if ((ISINST (pl->line, "ld") || ISINST (pl->line, "ldf")) && argCont (strchr (pl->line, ','), 'a')) + return TRUE; + } + else + { + if (ISINST (pl->line, "divw") || ISINST (pl->line, "exgw") || ISINST (pl->line, "trap")) + return TRUE; + + if (ISINST (pl->line, "exg") && strstr (strchr(pl->line, ','), what)) + return true; + + if ((ISINST (pl->line, "div") || ISINST (pl->line, "mul")) && pl->line[4] == extra) + return true; + + if ((ISINST (pl->line, "addw") + || ISINST (pl->line, "cplw") + || ISINST (pl->line, "decw") + || ISINST (pl->line, "incw") + || ISINST (pl->line, "negw") + || ISINST (pl->line, "rlcw") + || ISINST (pl->line, "rlwa") + || ISINST (pl->line, "rrcw") + || ISINST (pl->line, "rrwa") + || ISINST (pl->line, "sllw") + || ISINST (pl->line, "slaw") + || ISINST (pl->line, "sraw") + || ISINST (pl->line, "srlw") + || ISINST (pl->line, "subw") + || ISINST (pl->line, "tnzw")) && + pl->line[5] == extra) + return TRUE; + + if ((ISINST (pl->line, "pushw") + || ISINST (pl->line, "swapw")) && pl->line[6] == extra) + return TRUE; + + if (ISINST (pl->line, "cpw") && pl->line[4] == extra) + return TRUE; + + if ((strchr (pl->line, ',') ? argCont (strchr (pl->line, ','), extra) : argCont (strchr (pl->line, '('), extra)) && + (ISINST (pl->line, "adc") + || ISINST (pl->line, "add") + || ISINST (pl->line, "and") + || ISINST (pl->line, "bcp") + || ISINST (pl->line, "call") + || ISINST (pl->line, "clr") + || ISINST (pl->line, "cp") + || ISINST (pl->line, "cpl") + || ISINST (pl->line, "dec") + || ISINST (pl->line, "inc") + || ISINST (pl->line, "jp") + || ISINST (pl->line, "neg") + || ISINST (pl->line, "or") + || ISINST (pl->line, "rlc") + || ISINST (pl->line, "rrc") + || ISINST (pl->line, "sbc") + || ISINST (pl->line, "sll") + || ISINST (pl->line, "sla") + || ISINST (pl->line, "sra") + || ISINST (pl->line, "srl") + || ISINST (pl->line, "sub") + || ISINST (pl->line, "swap") + || ISINST (pl->line, "tnz") + || ISINST (pl->line, "cpw") + || ISINST (pl->line, "ldf") + || ISINST (pl->line, "ldw") + || ISINST (pl->line, "ld") + || ISINST (pl->line, "xor"))) + return TRUE; + + if (ISINST (pl->line, "ld") || ISINST (pl->line, "ldw")) + { + char buf[64], *p; + strcpy (buf, pl->line); + if (!!(p = strstr (buf, "0x")) || !!(p = strstr (buf, "0X"))) + p[0] = p[1] = ' '; + if (!!(p = strchr (buf, '(')) && !!strchr (p, extra)) + return TRUE; + } + } + + if(ISINST(pl->line, "ret") || ISINST(pl->line, "retf")) + return(isReturned(what)); + + return FALSE; +} + +static bool +stm8UncondJump(const lineNode *pl) +{ + return (ISINST(pl->line, "jp") || ISINST(pl->line, "jra") || ISINST(pl->line, "jrt") || ISINST(pl->line, "jpf")); +} + +static bool +stm8CondJump(const lineNode *pl) +{ + return (!stm8UncondJump(pl) && STARTSINST(pl->line, "jr") || + ISINST(pl->line, "btjt") || ISINST(pl->line, "btjf")); +} + +static bool +stm8SurelyWritesFlag(const lineNode *pl, const char *what) +{ + if (!strcmp (what, "n") || !strcmp (what, "z")) + { + if (ISINST (pl->line, "addw") && !strcmp (pl->line + 5, "sp")) + return false; + if (ISINST (pl->line, "sub") && !strcmp (pl->line + 4, "sp")) + return false; + if (ISINST (pl->line, "ld")) + return !stm8InstIsRegToReg(pl->line, false); + if (ISINST (pl->line, "ldw")) + return !stm8InstIsRegToReg(pl->line, true); + if (ISINST (pl->line, "pop")) + return (pl->line[5] == 'c'); + if (ISINST (pl->line, "bccm") || ISINST (pl->line, "bcpl") || + ISINST (pl->line, "break") || + ISINST (pl->line, "bres") || ISINST (pl->line, "bset") || + ISINST (pl->line, "btjf") || ISINST (pl->line, "btjt") || + ISINST (pl->line, "call") || ISINST (pl->line, "callf") || ISINST (pl->line, "callr") || + ISINST (pl->line, "ccf") || + ISINST (pl->line, "exg") || ISINST (pl->line, "exgw") || + ISINST (pl->line, "halt") || ISINST (pl->line, "int") || + STARTSINST (pl->line, "jp") || + STARTSINST (pl->line, "jr") || + ISINST (pl->line, "mov") || ISINST (pl->line, "mul") || + ISINST (pl->line, "nop") || + ISINST (pl->line, "popw") || ISINST (pl->line, "push") || ISINST (pl->line, "pushw") || + ISINST (pl->line, "rcf") || + ISINST (pl->line, "ret") || ISINST (pl->line, "retf") || + ISINST (pl->line, "rvf") || ISINST (pl->line, "scf") || + ISINST (pl->line, "sim") || ISINST (pl->line, "trap") || ISINST (pl->line, "wfe") || ISINST (pl->line, "wfi")) + return false; + return true; + } + else if (!strcmp (what, "c")) + { + if (ISINST (pl->line, "addw") && !strcmp (pl->line + 5, "sp")) + return false; + if (ISINST (pl->line, "sub") && !strcmp (pl->line + 4, "sp")) + return false; + + if (ISINST (pl->line, "adc") || + STARTSINST (pl->line, "add") || // add, addw + STARTSINST (pl->line, "btj") || // btjt, btjf + ISINST (pl->line, "ccf") || + STARTSINST (pl->line, "cp") || // cp, cpw, cpl, cplw + STARTSINST (pl->line, "div") || // div, divw + STARTSINST (pl->line, "neg") || // neg, negw + ISINST (pl->line, "rcf") || + STARTSINST (pl->line, "rlc") || // rlc, rlcw + STARTSINST (pl->line, "rrc") || // rrc, rrcw + ISINST (pl->line, "sbc") || + ISINST (pl->line, "scf") || + STARTSINST (pl->line, "sl") || // sll, sla, sllw, slaw + STARTSINST (pl->line, "sr") || // sra, sraw, srl, srlw + STARTSINST (pl->line, "sub")) // sub, subw + return true; + } + + return false; +} + +static bool +stm8SurelyWrites(const lineNode *pl, const char *what) +{ + char extra = 0; + if (!strcmp (what, "xl") || !strcmp (what, "xh")) + extra = 'x'; + else if (!strcmp (what, "yl") || !strcmp (what, "yh")) + extra = 'y'; + else if (strcmp (what, "a")) + return (stm8SurelyWritesFlag (pl, what)); + + if (!extra) + { + if (ISINST (pl->line, "adc") + || ISINST (pl->line, "and") + || ISINST (pl->line, "div") + || ISINST (pl->line, "iret") + || ISINST (pl->line, "or") + || ISINST (pl->line, "rlwa") + || ISINST (pl->line, "rrwa") + || ISINST (pl->line, "sbc") + || ISINST (pl->line, "xor")) + return TRUE; + + if ((ISINST (pl->line, "add") + || ISINST (pl->line, "clr") + || ISINST (pl->line, "cpl") + || ISINST (pl->line, "dec") + || ISINST (pl->line, "exg") + || ISINST (pl->line, "inc") + || ISINST (pl->line, "neg") + || ISINST (pl->line, "pop") + || ISINST (pl->line, "rlc") + || ISINST (pl->line, "rrc") + || ISINST (pl->line, "sll") + || ISINST (pl->line, "sla") + || ISINST (pl->line, "sra") + || ISINST (pl->line, "srl") + || ISINST (pl->line, "ldf") + || ISINST (pl->line, "sub")) && + pl->line[4] == 'a') + return TRUE; + + if (ISINST (pl->line, "swap") && pl->line[5] == 'a') + return TRUE; + + if (ISINST (pl->line, "ld") && pl->line[3] == 'a') + return TRUE; + } + else + { + if (ISINST (pl->line, "divw") + || ISINST (pl->line, "exgw") + || ISINST (pl->line, "iret")) + return TRUE; + + if ((ISINST (pl->line, "div") + || ISINST (pl->line, "ldw") + || ISINST (pl->line, "mul")) + && pl->line[4] == extra) + return TRUE; + + if ((ISINST (pl->line, "addw") + || ISINST (pl->line, "clrw") + || ISINST (pl->line, "cplw") + || ISINST (pl->line, "decw") + || ISINST (pl->line, "incw") + || ISINST (pl->line, "negw") + || ISINST (pl->line, "popw") + || ISINST (pl->line, "rlcw") + || ISINST (pl->line, "rlwa") + || ISINST (pl->line, "rrcw") + || ISINST (pl->line, "rrwa") + || ISINST (pl->line, "sllw") + || ISINST (pl->line, "slaw") + || ISINST (pl->line, "sraw") + || ISINST (pl->line, "srlw") + || ISINST (pl->line, "subw")) && + pl->line[5] == extra) + return TRUE; + + if (ISINST (pl->line, "swapw") && pl->line[6] == extra) + return TRUE; + + if (ISINST (pl->line, "ld") + && strncmp (pl->line + 3, what, strlen (what)) == 0) + return TRUE; + + if (ISINST (pl->line, "exg") && strstr (strstr (pl->line, ","), what)) + return true; + } + + return false; +} + +static bool +stm8SurelyReturns(const lineNode *pl) +{ + return(ISINST(pl->line, "ret") || ISINST(pl->line, "retf")); +} + +/*-----------------------------------------------------------------*/ +/* scan4op - "executes" and examines the assembler opcodes, */ +/* follows conditional and un-conditional jumps. */ +/* Moreover it registers all passed labels. */ +/* */ +/* Parameter: */ +/* lineNode **pl */ +/* scanning starts from pl; */ +/* pl also returns the last scanned line */ +/* const char *pReg */ +/* points to a register (e.g. "ar0"). scan4op() tests for */ +/* read or write operations with this register */ +/* const char *untilOp */ +/* points to NULL or a opcode (e.g. "push"). */ +/* scan4op() returns if it hits this opcode. */ +/* lineNode **plCond */ +/* If a conditional branch is met plCond points to the */ +/* lineNode of the conditional branch */ +/* */ +/* Returns: */ +/* S4O_ABORT */ +/* on error */ +/* S4O_VISITED */ +/* hit lineNode with "visited" flag set: scan4op() already */ +/* scanned this opcode. */ +/* S4O_FOUNDOPCODE */ +/* found opcode and operand, to which untilOp and pReg are */ +/* pointing to. */ +/* S4O_RD_OP, S4O_WR_OP */ +/* hit an opcode reading or writing from pReg */ +/* S4O_CONDJMP */ +/* hit a conditional jump opcode. pl and plCond return the */ +/* two possible branches. */ +/* S4O_TERM */ +/* acall, lcall, ret and reti "terminate" a scan. */ +/*-----------------------------------------------------------------*/ +static S4O_RET +scan4op (lineNode **pl, const char *what, const char *untilOp, + lineNode **plCond) +{ + for (; *pl; *pl = (*pl)->next) + { + if (!(*pl)->line || (*pl)->isDebug || (*pl)->isComment || (*pl)->isLabel) + continue; + D(("Scanning %s for %s\n", (*pl)->line, what)); + /* don't optimize across inline assembler, + e.g. isLabel doesn't work there */ + if ((*pl)->isInline) + { + D(("S4O_ABORT at inline asm\n")); + return S4O_ABORT; + } + + if ((*pl)->visited) + { + D(("S4O_VISITED\n")); + return S4O_VISITED; + } + + (*pl)->visited = TRUE; + + if(stm8MightRead(*pl, what)) + { + D(("S4O_RD_OP\n")); + return S4O_RD_OP; + } + + // Check writes before conditional jumps, some jumps (btjf, btjt) write 'c' + if(stm8SurelyWrites(*pl, what)) + { + D(("S4O_WR_OP\n")); + return S4O_WR_OP; + } + + if(stm8UncondJump(*pl)) + { + *pl = findLabel (*pl); + if (!*pl) + { + D(("S4O_ABORT at unconditional jump\n")); + return S4O_ABORT; + } + } + if(stm8CondJump(*pl)) + { + *plCond = findLabel (*pl); + if (!*plCond) + { + D(("S4O_ABORT at conditional jump\n")); + return S4O_ABORT; + } + D(("S4O_CONDJMP\n")); + return S4O_CONDJMP; + } + + /* Don't need to check for de, hl since stm8MightRead() does that */ + if(stm8SurelyReturns(*pl)) + { + D(("S4O_TERM\n")); + return S4O_TERM; + } + } + D(("S4O_ABORT\n")); + return S4O_ABORT; +} + +/*-----------------------------------------------------------------*/ +/* doTermScan - scan through area 2. This small wrapper handles: */ +/* - action required on different return values */ +/* - recursion in case of conditional branches */ +/*-----------------------------------------------------------------*/ +static bool +doTermScan (lineNode **pl, const char *what) +{ + lineNode *plConditional; + for (;; *pl = (*pl)->next) + { + switch (scan4op (pl, what, NULL, &plConditional)) + { + case S4O_TERM: + case S4O_VISITED: + case S4O_WR_OP: + /* all these are terminating conditions */ + return true; + case S4O_CONDJMP: + /* two possible destinations: recurse */ + { + lineNode *pl2 = plConditional; + D(("CONDJMP trying other branch first\n")); + if (!doTermScan (&pl2, what)) + return false; + D(("Other branch OK.\n")); + } + continue; + case S4O_RD_OP: + default: + /* no go */ + return false; + } + } +} + +/*-----------------------------------------------------------------*/ +/* univisitLines - clear "visited" flag in all lines */ +/*-----------------------------------------------------------------*/ +static void +unvisitLines (lineNode *pl) +{ + for (; pl; pl = pl->next) + pl->visited = false; +} + +bool +stm8notUsed (const char *what, lineNode *endPl, lineNode *head) +{ + lineNode *pl; + if(strcmp(what, "x") == 0) + return(stm8notUsed("xl", endPl, head) && stm8notUsed("xh", endPl, head)); + else if(strcmp(what, "y") == 0) + return(stm8notUsed("yl", endPl, head) && stm8notUsed("yh", endPl, head)); + + _G.head = head; + + unvisitLines (_G.head); + + pl = endPl->next; + return (doTermScan (&pl, what)); +} + +bool +stm8notUsedFrom (const char *what, const char *label, lineNode *head) +{ + lineNode *cpl; + + for (cpl = head; cpl; cpl = cpl->next) + if (cpl->isLabel && !strncmp (label, cpl->line, strlen(label))) + return (stm8notUsed (what, cpl, head)); + + return FALSE; +} + +/* can be directly assigned with ld */ +bool +stm8canAssign (const char *op1, const char *op2, const char *exotic) +{ + //fprintf(stderr, "op1=%s op2=%s exotic=%s\n", op1, op2, exotic); + const char *reg, *payload; + reg = op1[0] == 'a' ? op1 : op2; + payload = reg == op1 ? op2 : op1; + if(isRelativeAddr(payload, "x") + || isRelativeAddr(payload, "y") + || isRelativeAddr(payload, "sp") + || !strcmp(payload, "(x)") + || !strcmp(payload, "(y)") + || !strcmp(payload, "xl") + || !strcmp(payload, "xh")) + return(reg[0] == 'a'); + return(FALSE); +} diff --git a/src/stm8/peep.h b/src/stm8/peep.h new file mode 100644 index 0000000..1360d14 --- /dev/null +++ b/src/stm8/peep.h @@ -0,0 +1,5 @@ +bool stm8notUsed(const char *what, lineNode *endPl, lineNode *head); +bool stm8notUsedFrom(const char *what, const char *label, lineNode *head); +bool stm8canAssign (const char *dst, const char *src, const char *exotic); +int stm8instructionSize(lineNode *node); + diff --git a/src/stm8/peeph.def b/src/stm8/peeph.def new file mode 100644 index 0000000..f204389 --- /dev/null +++ b/src/stm8/peeph.def @@ -0,0 +1,1592 @@ +// peeph.def - STM8 peephole rules + +replace restart { + ld %1, %2 +} by { + ; peephole 0 removed dead load into %1 from %2. +} if notVolatile(%1), notVolatile(%2), notUsed(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw a, (%2, sp) +} by { + ; peephole 0a removed dead load into a from %2. +} if notUsed('n'), notUsed('z') + +replace restart { + ldw %1, %2 +} by { + ; peephole 0w removed dead load into %1 from %2. +} if notVolatile(%1), notVolatile(%2), notUsed(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw %1, (%2, sp) +} by { + ; peephole 0wa removed dead load into %1 from %2. +} if notUsed(%1), notUsed('n'), notUsed('z') + +replace restart { + clr %1 +} by { + ; peephole 1 removed dead clear of %1. +} if notVolatile(%1), notUsed(%1), notUsed('n'), notUsed('z') + +replace restart { + or a, %1 +} by { + ; peephole 2 removed dead or. +} if notVolatile(%1), notUsed('a'), notUsed('n'), notUsed('z') + +replace restart { + clrw %1 +} by { + ; peephole 3 removed dead clrw of %1. +} if notVolatile(%1), notUsed(%1), notUsed('n'), notUsed('z') + +replace restart { + ld %1, %2 + ld %2, %1 +} by { + ld %1, %2 + ; peephole 4 removed redundant load from %1 into %2. +} if notVolatile(%1), notVolatile(%2) + +replace restart { + ldw %1, %2 + ldw %2, %1 +} by { + ldw %1, %2 + ; peephole 4w removed redundant load from %1 into %2. +} if notVolatile(%1), notVolatile(%2) + +replace restart { + ld (%1, sp), %2 + ld %2, (%1, sp) +} by { + ld (%1, sp), %2 + ; peephole 4a removed redundant load from (%1, sp) into %2. +} + +replace restart { + ld %1, %2 + ld %3, %1 + ld %1, %2 +} by { + ld %1, %2 + ld %3, %1 + ; peephole 4b removed redundant load from %2 into %1. +} if notVolatile(%1), notVolatile(%2) + +replace restart { + ldw (%1, sp), %2 + ld a, (%2) + %3 a + ldw %2, (%1, sp) +} by { + ldw (%1, sp), %2 + ld a, (%2) + %3 a + ; peephole 4c removed redundant load from (%1, sp) into %2. +} + +replace restart { + ld %1, a + exg a, %1 +} by { + ld %1, a + ; peephole 5 removed redundant exg. +} if notVolatile(%1) + +replace restart { + ld xl, a + srl a + srlw x +} by { + srl a + ; peephole 5a removed redundant srlw x. +} if notUsed('x') + +replace restart { + ld %1, %2 + ld %3, %1 + ld %1, %2 +} by { + ld %1, %2 + ld %3, %1 + ; peephole 5a removed redundant rght shift of . +} if notVolatile(%1), notVolatile(%2) + +replace restart { + pop %1 + push %1 +} by { + ; peephole 6 removed dead pop / push pair. +} if notUsed(%1) + +replace restart { + popw %1 + pushw %1 +} by { + ; peephole 7 removed dead popw / pushw pair. +} if notUsed(%1) + +replace restart { + addw %1, #%2 + ldw (%1), %3 +} by { + ; peephole 8 moved addition of offset into storage instruction + ldw (%2, %1), %3 +} if notUsed(%1) + +replace restart { + addw %1, #%2 + ld a, %4 + ld (%1), a +} by { + ; peephole 9 moved addition of offset into storage instruction + ld a, %4 + ld (%2, %1), a +} if notUsed(%1) + +replace restart { + addw %1, #%2 + clr (%1) +} by { + ; peephole 9a moved addition of offset into clear instruction + clr (%2, %1) +} if notUsed(%1) + +replace restart { + addw %1, #%2 + ldw %3, %4 + ldw (%1), %3 +} by { + ; peephole 10 moved addition of offset into storage instruction + ldw %3, %4 + ldw (%2, %1), %3 +} if notUsed(%1) + +replace restart { + incw %1 + incw %1 + ldw %3, %4 + ldw (%1), %3 +} by { + ; peephole 10a moved addition of offset into storage instruction + ldw %3, %4 + ldw (0x02, %1), %3 +} if notUsed(%1) + +replace restart { + ldw (%1, sp), x + ldw x, (%2, sp) + addw x, (%1, sp) +} by { + ldw (%1, sp), x + ; peephole 11 eliminated load using commutativity of addition + addw x, (%2, sp) +} + +replace restart { + ldw %1, (%2, sp) + ld a, (%1) + %3 a + ldw %1, (%2, sp) +} by { + ldw %1, (%2, sp) + ld a, (%1) + %3 a + ; peephole 12 removed redundant load from (%2, sp) into %1. +} if notSame(%3 'push' 'pop') + +replace restart { + ldw (%1, sp), %2 + ldw %2, (%1, sp) +} by { + ldw (%1, sp), %2 + ; peephole 13 removed redundant load from (%1, sp) into %2. +} + +replace restart { + ldw (%1, sp), x + ldw y, (%1, sp) +} by { + ldw (%1, sp), x + ldw y, x + ; peephole 14 replaced load from (%1, sp) into y by load from x into y. +} + +replace restart { + ldw y, x + ldw (%1, sp), y +} by { + ldw (%1, sp), x + ; peephole 14a loaded (%1, sp) directly from x instead of going through y. +} if notUsed('y') + +replace restart { + ldw y, x + ld a, %1 + ld (%2, y), a +} by { + ; peephole 14b used y directly instead of going through x. + ld a, %1 + ld (%2, x), a +} if notUsed('y') + +replace restart { + ldw x, y + ld a, %1 + ld (%2, x), a +} by { + ; peephole 14c used y directly instead of going through x. + ld a, %1 + ld (%2, y), a +} if notUsed('x') + +replace restart { + ldw (%1, sp), y + ldw x, (%1, sp) +} by { + ldw (%1, sp), y + ldw x, y + ; peephole 15 replaced load from (%1, sp) into x by load from y into x. +} + +replace restart { + ld a, %1 + %2 a + ld %1, a +} by { + %2 %1 + ; peephole 16 applied %2 on %1 instead of a. +} if notUsed('a'), notSame(%2 'push' 'pop'), notSame(%1 'xl' 'xh' 'yl' 'yh') + +replace restart { + ld a, %1 + or a, #0x80 + ld %1, a +} by { + rlc %1 + scf + rrc %1 + ; peephole 17 set msb in carry instead of a. +} if notUsed('a'), notSame(%1 'xl' 'xh' 'yl' 'yh'), notUsed('c'), notVolatile(%1) + +replace restart { + ld a, %1 + or a, #0x01 + ld %1, a +} by { + bset %2, #0 + ; peephole 18-0 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + or a, #0x02 + ld %1, a +} by { + bset %2, #1 + ; peephole 18-1 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + or a, #0x04 + ld %1, a +} by { + bset %2, #2 + ; peephole 18-2 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + or a, #0x08 + ld %1, a +} by { + bset %2, #3 + ; peephole 18-3 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + or a, #0x10 + ld %1, a +} by { + bset %2, #4 + ; peephole 18-4 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1, %2) + +replace restart { + ld a, %1 + or a, #0x20 + ld %1, a +} by { + bset %2, #5 + ; peephole 18-5 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + or a, #0x40 + ld %1, a +} by { + bset %2, #6 + ; peephole 18-6 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + or a, #0x80 + ld %1, a +} by { + bset %2, #7 + ; peephole 18-7 replaced or by bset. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xfe + ld %1, a +} by { + bres %2, #0 + ; peephole 19-0 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xfd + ld %1, a +} by { + bres %2, #1 + ; peephole 19-1 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xfb + ld %1, a +} by { + bres %2, #2 + ; peephole 19-2 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xf7 + ld %1, a +} by { + bres %2, #3 + ; peephole 19-3 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xef + ld %1, a +} by { + bres %2, #4 + ; peephole 19-4 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xdf + ld %1, a +} by { + bres %2, #5 + ; peephole 19-5 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0xbf + ld %1, a +} by { + bres %2, #6 + ; peephole 19-6 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + and a, #0x7f + ld %1, a +} by { + bres %2, #7 + ; peephole 19-7 replaced and by bres. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x01 + ld (%1), a +} by { + bcpl %2, #0 + ; peephole 20-0 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x02 + ld (%1), a +} by { + bcpl %2, #1 + ; peephole 20-1 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x04 + ld (%1), a +} by { + bcpl %2, #2 + ; peephole 20-2 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x08 + ld (%1), a +} by { + bcpl %2, #3 + ; peephole 20-3 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x10 + ld (%1), a +} by { + bcpl %2, #4 + ; peephole 20-4 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x20 + ld (%1), a +} by { + bcpl %2, #5 + ; peephole 20-5 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x40 + ld (%1), a +} by { + bcpl %2, #6 + ; peephole 20-6 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ldw %1, #%2 + ld a, (%1) + xor a, #0x80 + ld (%1), a +} by { + bcpl %2, #7 + ; peephole 20-7 replaced or by bcpl. +} if notUsed(%1), notUsed('a') + +replace restart { + ld a, %1 + xor a, #0x01 + ld %1, a +} by { + bcpl %2, #0 + ; peephole 21-0 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + xor a, #0x02 + ld %1, a +} by { + bcpl %2, #1 + ; peephole 21-1 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + xor a, #0x04 + ld %1, a +} by { + bcpl %2, #2 + ; peephole 21-2 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + xor a, #0x08 + ld %1, a +} by { + bcpl %2, #3 + ; peephole 21-3 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + xor a, #0x10 + ld %1, a +} by { + bcpl %2, #4 + ; peephole 21-4 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1, %2) + +replace restart { + ld a, %1 + xor a, #0x20 + ld %1, a +} by { + bcpl %2, #5 + ; peephole 21-5 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + xor a, #0x40 + ld %1, a +} by { + bcpl %2, #6 + ; peephole 21-6 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + ld a, %1 + xor a, #0x80 + ld %1, a +} by { + bcpl %2, #7 + ; peephole 21-7 replaced or by bcpl. +} if notUsed('a'), operandsLiteral(%1), immdInRange(0 65535 '+' 0 %1 %2) + +replace restart { + clr a + and a, %1 +} by { + clr a + ; peephole 22 removed redundant and. +} if notVolatile(%1) + +replace restart { + clr a + ; peephole 22a removed redundant clr a. + or a, %1 +} by { + ld a, %1 +} + +replace restart { + clr a + or a, (%1, %2) +} by { + ; peephole 22b removed redundant clr a. + ld a, (%1, %2) +} + +replace restart { + tnzw %1 + jrne %2 + clrw %1 +} by { + tnzw %1 + jrne %2 + ; peephole 22c removed redundant clrw %1. +} + +replace restart { + ldw %1, (%3, %4) + jrne %2 + clrw %1 +} by { + ldw %1, (%3, %4) + jrne %2 + ; peephole 22d removed redundant clrw %1. +} + +replace restart { + and a, %1 + tnz a +} by { + and a, %1 + ; peephole 23 removed redundant tnz. +} + +replace restart { + or a, %1 + tnz a +} by { + or a, %1 + ; peephole 24 removed redundant tnz. +} + +replace restart { + xor a, %1 + tnz a +} by { + xor a, %1 + ; peephole 25 removed redundant tnz. +} + +replace restart { + add a, %1 + tnz a +} by { + add a, %1 + ; peephole 26 removed redundant tnz. +} + +replace restart { + adc a, %1 + tnz a +} by { + adc a, %1 + ; peephole 27 removed redundant tnz. +} + +replace restart { + sub a, %1 + tnz a +} by { + sub a, %1 + ; peephole 28 removed redundant tnz. +} + +replace restart { + sbc a, %1 + tnz a +} by { + sbc a, %1 + ; peephole 29 removed redundant tnz. +} + +replace restart { + ld a, %1 + tnz a +} by { + ld a, %1 + ; peephole 30 removed redundant tnz. +} if notSame(%1 'xl' 'xh' 'yl' 'yh') + +replace restart { + ldw %1, (%1) + tnzw %1 +} by { + ldw %1, (%1) + ; peephole 30bw removed redundant tnzw. +} + +replace restart { + ld a, (%1, %2) + tnz a +} by { + ld a, (%1, %2) + ; peephole 31 removed redundant tnz. +} + +replace restart { + ldw %1, (%2, %3) + tnzw %1 +} by { + ldw %1, (%2, %3) + ; peephole 31w removed redundant tnzw. +} + +replace restart { + ld (%1, %2), a + tnz (%1, %2) +} by { + ld (%1, %2), a + ; peephole 31a removed redundant tnz. +} + +replace restart { + ld (%1, %2), a + tnz a +} by { + ld (%1, %2), a + ; peephole 31b removed redundant tnz. +} + +replace restart { + rlc a + tnz a +} by { + rlc a + ; peephole 32 removed redundant tnz. +} + +replace restart { + addw sp, #%1 + addw sp, #%2 +} by { + addw sp, #%9 + ; peephole 33 combined additions to sp. +} if immdInRange(0 255 '+' %1 %2 %9) + +replace restart { + pop a + addw sp, #%2 +} by { + addw sp, #%9 + ; peephole 34 merged pop a into addw. +} if notUsed('a'), immdInRange(0 255 '+' 1 %2 %9) + +replace restart { + addw sp, #%2 + pop a +} by { + addw sp, #%9 + ; peephole 35 merged pop a into addw. +} if notUsed('a'), immdInRange(0 255 '+' 1 %2 %9) + +replace restart { + popw x + addw sp, #%2 +} by { + addw sp, #%9 + ; peephole 36 merged popw x into addw. +} if notUsed('x'), immdInRange(0 255 '+' 2 %2 %9) + +replace restart { + addw sp, #%2 + popw x +} by { + addw sp, #%9 + ; peephole 37 merged popw x into addw. +} if notUsed('x'), immdInRange(0 255 '+' 2 %2 %9) + +replace restart { + pop a + pop a +} by { + popw x + ; peephole 38 merged pop a into popw x +} if notUsed('a'), notUsed('x') + +replace restart { + pop a + popw x +} by { + addw sp, #3 + ; peephole 39 merged popw x into addw. +} if notUsed('a'), notUsed('x') + +replace restart { + popw x + pop a +} by { + addw sp, #3 + ; peephole 40 merged popw x into addw. +} if notUsed('a'), notUsed('x') + +replace restart { + popw x + popw x +} by { + addw sp, #4 + ; peephole 41 merged popw x into addw. +} if notUsed('x') + +replace restart { + ld a, %1 + cp a, %2 + jrc %3 + ld a, %1 +} by { + ld a, %1 + cp a, %2 + jrc %3 + ; peephole 42 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ld a, %1 + cp a, %2 + jrslt %3 + ld a, %1 +} by { + ld a, %1 + cp a, %2 + jrslt %3 + ; peephole 43 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ld a, %1 + cp a, %2 + jrsle %3 + ld a, %1 +} by { + ld a, %1 + cp a, %2 + jrsle %3 + ; peephole 44 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ld a, %1 + cp a, %2 + jrule %3 + ld a, %1 +} by { + ld a, %1 + cp a, %2 + jrule %3 + ; peephole 45 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw x, %1 + cpw x, %2 + jrc %3 + ldw x, %1 +} by { + ldw x, %1 + cpw x, %2 + jrc %3 + ; peephole 46 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw x, %1 + cpw x, %2 + jrslt %3 + ldw x, %1 +} by { + ldw x, %1 + cpw x, %2 + jrslt %3 + ; peephole 47 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw x, %1 + cpw x, %2 + jrsle %3 + ldw x, %1 +} by { + ldw x, %1 + cpw x, %2 + jrsle %3 + ; peephole 48 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw x, %1 + cpw x, %2 + jrule %3 + ldw x, %1 +} by { + ldw x, %1 + cpw x, %2 + jrule %3 + ; peephole 49 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw x, %1 + cpw x, %2 + jrnc %3 + ldw x, %1 +} by { + ldw x, %1 + cpw x, %2 + jrnc %3 + ; peephole 50 removed redundant load of a from %1. +} if notVolatile(%1), notUsed('n'), notUsed('z') + +replace restart { + ldw x, %1 + jreq %3 + ldw x, %1 +} by { + ldw x, %1 + jreq %3 + ; peephole 50eq removed redundant load of x from %1. +} if notVolatile(%1) + +replace restart { + ldw x, %1 + jrne %3 + ldw x, %1 +} by { + ldw x, %1 + jrne %3 + ; peephole 50ne removed redundant load of x from %1. +} if notVolatile(%1) + +replace restart { + cp a, %1 + jrne %2 + ld a, #0x01 +} by { + sub a, %1 + jrne %2 + inc a + ; peephole 51 used inc to get #1 into a. +} if notUsedFrom(%2 'a') + +replace { + jp %5 + ret +} by { + jp %5 + ; peephole 52 removed unused ret. +} + +replace { + jp %5 + addw sp, %1 + ret +} by { + jp %5 + ; peephole 53 removed unused ret. +} + +replace restart { + jp %5 +} by { + jp %6 + ; peephole j1 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jpf %5 +} by { + jpf %6 + ; peephole j1a jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jp %1 +%1: +} by { +%1: + ; peephole j1b removed redundant jump. +} if labelRefCountChange(%1 -1) + +replace restart { + jpf %1 +%1: +} by { +%1: + ; peephole j1c removed redundant jump. +} if labelRefCountChange(%1 -1) + +replace restart { + jp %1 +%2: +%1: +} by { +%2: +%1: + ; peephole j1d removed redundant jump. +} if labelRefCountChange(%1 -1) + +replace restart { + jpf %1 +%2: +%1: +} by { +%2: +%1: + ; peephole j1e removed redundant jump. +} if labelRefCountChange(%1 -1) + +replace restart { + jra %1 +%1: +} by { +%1: + ; peephole j1f removed redundant jump. +} if labelRefCountChange(%1 -1) + +replace restart { + jp %1 + jp %2 +} by { + jp %1 + ; peephole j2a removed unreachable jump to %2. +} if labelRefCountChange(%2 -1) + +replace restart { + jra %1 + jp %2 +} by { + jra %1 + ; peephole j2b removed unreachable jump to %2. +} if labelRefCountChange(%2 -1) + +replace restart { + jp %1 + jra %2 +} by { + jp %1 + ; peephole jc2 removed unreachable jump to %2. +} if labelRefCountChange(%2 -1) + +replace restart { + jra %1 + jra %2 +} by { + jra %1 + ; peephole j2d removed unreachable jump to %2. +} if labelRefCountChange(%2 -1) + +replace restart { + jreq %1 + jreq %2 +} by { + jreq %1 + ; peephole j2d-eq removed unreachable jump to %2. +} if labelRefCountChange(%2 -1) + +replace restart { + jrne %1 + jrne %2 +} by { + jrne %1 + ; peephole j2d-ne removed unreachable jump to %2. +} if labelRefCountChange(%2 -1) + +// Ensure jump-to-jump optimiation of absolute jumps above is done before other jump-related optimizations. +barrier + +replace restart { + jp %5 +} by { + ret + ; peephole j2e replaced jump by return. +} if labelIsReturnOnly(%5), labelRefCountChange(%5 -1) + +replace restart { + jpf %5 +} by { + retf + ; peephole j2f replaced jump by return. +} if labelIsReturnOnly(%5), labelRefCountChange(%5 -1) + +replace restart { + ld a, %1 + srl a + btjt %1, #0, %2 +} by { + ld a, %1 + srl a + ; peephole j3 jumped by carry bit instead of testing bit explicitly. + jrc %2 +} + +replace restart { + ld a, %1 + srl a + btjf %1, #0, %2 +} by { + ld a, %1 + srl a + ; peephole j4 jumped by carry bit instead of testing bit explicitly. + jrnc %2 +} + +replace restart { + jp %5 +} by { + jra %5 + ; peephole j5 changed absolute to relative unconditional jump. +} if labelInRange(%5) + +replace restart { + jpf %5 +} by { + jra %5 + ; peephole j5a changed absolute to relative unconditional jump. +} if labelInRange(%5) + +replace restart { + jrc %1 + jra %5 +%1: +} by { + jrnc %5 + ; peephole j6 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jreq %1 + jra %5 +%1: +} by { + jrne %5 + ; peephole j7 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jreq %1 + jrne %2 +%1: +} by { + jrne %2 + ; peephole j7-eq-ne replaced jreq-jrne by jrne +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrne %1 + jreq %2 +%1: +} by { + jreq %2 + ; peephole j7-ne-eq replaced jrne-jreq by jreq +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrmi %1 + jra %5 +%1: +} by { + jrpl %5 + ; peephole j8 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrnc %1 + jra %5 +%1: +} by { + jrc %5 + ; peephole j9 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrne %1 + jra %5 +%1: +} by { + jreq %5 + ; peephole j10 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrpl %1 + jra %5 +%1: +} by { + jrmi %5 + ; peephole j11 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrsge %1 + jra %5 +%1: +} by { + jrslt %5 + ; peephole j12 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrsgt %1 + jra %5 +%1: +} by { + jrsle %5 + ; peephole j13 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrsle %1 + jra %5 +%1: +} by { + jrsgt %5 + ; peephole j14 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrslt %1 + jra %5 +%1: +} by { + jrsge %5 + ; peephole j15 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrugt %1 + jra %5 +%1: +} by { + jrule %5 + ; peephole j16 removed jra by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrule %1 + jra %5 +%1: +} by { + jrugt %5 + ; peephole j17 removed jp by using inverse jump logic +%1: +} if labelRefCountChange(%1 -1) + +replace restart { + jrc %5 +} by { + jrc %6 + ; peephole j18 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jreq %5 +} by { + jreq %6 + ; peephole j19 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrmi %5 +} by { + jrmi %6 + ; peephole j20 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrnc %5 +} by { + jrnc %6 + ; peephole j21 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrne %5 +} by { + jrne %6 + ; peephole j22 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrpl %5 +} by { + jrpl %6 + ; peephole j23 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrge %5 +} by { + jrge %6 + ; peephole j24 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrgt %5 +} by { + jrgt %6 + ; peephole j25 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrle %5 +} by { + jrle %6 + ; peephole j26 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrlt %5 +} by { + jrlt %6 + ; peephole j27 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrugt %5 +} by { + jrugt %6 + ; peephole j28 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + jrule %5 +} by { + jrule %6 + ; peephole j29 jumped to %6 directly instead of via %5. +} if labelIsUncondJump(), notSame(%5 %6), labelInRange(%6), labelRefCountChange(%5 -1), labelRefCountChange(%6 +1) + +replace restart { + call %5 +} by { + callr %5 + ; peephole j30 changed absolute to relative call. +} if labelInRange(%5) + +// Should be one of the last ones. Opens the code to further peephole optimization. +replace restart { +%1: +} by { + ; peephole j30 removed unused label %1. +} if labelRefCount(%1 0) + + +replace restart { + ldw x, (%1, sp) + cpw x, #%2 + jrsge %3 + ldw x, #%2 +} by { + ldw x, #%2 + cpw x, (%1, sp) + jrslt %3 + ; peephole j31 removed load by inverting jump condition. +} if notUsedFrom(%3 'x') + +replace restart { + ldw x, (%1, sp) + cpw x, #%2 + jrsle %3 + ldw x, #%2 +} by { + ldw x, #%2 + cpw x, (%1, sp) + jrsgt %3 + ; peephole j32 removed load by inverting jump condition. +} if notUsedFrom(%3 'x') + +// Barrier, since notUsed() is better at dealing with ret than with jp to unknown location. +barrier + +replace restart { + call %1 + ret +} by { + jp %1 + ; peephole j31 replaced call at end of function by jump (tail call optimization). +} + +replace restart { + callf %1 + retf +} by { + jpf %1 + ; peephole j31a replaced call at end of function by jump (tail call optimization). +} + +replace restart { + ld xl, a + ld a, xh +} by { + rlwa x + ; peephole r1 used rlwa. +} if notUsed('xh') + +replace restart { + ld yl, a + ld a, yh +} by { + rlwa y + ; peephole r2 used rlwa. +} if notUsed('yh') + +replace restart { + ld xh, a + ld a, xl +} by { + rrwa x + ; peephole r3 used rrwa. +} if notUsed('xl') + +replace restart { + ld yh, a + ld a, yl +} by { + rrwa y + ; peephole r4 used rrwa. +} if notUsed('yl') + +// Barrier, so nothing else ever sees the jump-on-false optimization. +barrier + +// The STM8 has a relative jump-on-false instruction, which never jumps to its target. This can be used to optimize jumps over 1-byte instructions as we can use the instruction we jump over as the offset for the jump. +replace { + jra %5 +%2: + clr a +%5: +} by { + .byte 0x21 + ; peephole jrf1 used jump-on-false opcode to shorten jump over 1-byte instruction. + %2: + clr a + %5: +} if labelRefCountChange(%5 -1) + +replace { + jra %5 +%2: + clrw x +%5: +} by { + .byte 0x21 + ; peephole jrf2 used jump-on-false opcode to shorten jump over 1-byte instruction. + %2: + clrw x + %5: +} if labelRefCountChange(%5 -1) + +replace { + jra %5 +%2: + ld a, xl +%5: +} by { + .byte 0x21 + ; peephole jrf3 used jump-on-false opcode to shorten jump over 1-byte instruction. + %2: + ld a, xl + %5: +} if labelRefCountChange(%5 -1) + +replace { + jrugt %1 + clr a + jra %2 +%1: + ld a, #0x01 +%2: +} by { + jrule %1 + ld a, #0x01 + .byte 0x21 + ; peephole jrf4 used jump-on-false opcode to shorten jump over 1-byte instruction. +%1: + clr a +%2: +} if labelRefCount(%1 1), labelRefCountChange(%2 -1) + +replace { + jra %5 +%2: + ldw x, y +%5: +} by { + .byte 0x21 + ; peephole jrf5 used jump-on-false opcode to shorten jump over 1-byte instruction. + %2: + ldw x, y + %5: +} if labelRefCountChange(%5 -1) + +// We don't have an explicit instruction to jump over 2 bytes. But when flags are not used, we can use cp a, longmen instead. + +// Jumping over ld a, #0xmm bcp does a read from a memory location at 0xa6mm - which for all current STM8 is in Flash or unused. If a future STM8 places memory-mapped I/O there, we're in trouble. +replace { + jra %5 +%1: + ld a, #%2 +%5: +} by { + .byte 0xc5 + ; peephole jrf6 used bcp opcode to jump over 2-byte instruction. +%1: + ld a, #%2 +%5: +} if notUsed('n'), notUsed('z'), labelRefCountChange(%5 -1) + +// Jumping over clr (mm, sp) bcp does a read from a memory location at 0x0fmm - which for all current STM8 is in RAM or unused. If a future STM8 places memory-mapped I/O there, we're in trouble. +replace { + jra %5 +%1: + clr (%2, sp) +%5: +} by { + .byte 0xc5 + ; peephole jrf7 used bcp opcode to jump over 2-byte instruction. +%1: + clr (%2, sp) +%5: +} if notUsed('n'), notUsed('z'), labelRefCountChange(%5 -1) + +// Jumping over clrw x; incwx bcp does a read from a memory location at 0x5f5c - which for all current STM8 is unused. If a future STM8 places memory-mapped I/O there, we're in trouble. +replace { + jra %5 +%1: + clrw x + incw x +%5: +} by { + .byte 0xc5 + ; peephole jrf8 used bcp opcode to jump over 2-byte instruction sequence. +%1: + clrw x + incw x +%5: +} if notUsed('n'), notUsed('z'), labelRefCountChange(%5 -1) + +// Jumping over ldw x, (mm, sp) bcp does a read from a memory location at 0x1emm - which for all current STM8 is unused. If a future STM8 places memory-mapped I/O there, we're in trouble. +replace { + jra %5 +%1: + ldw x, (%2, sp) +%5: +} by { + .byte 0xc5 + ; peephole jrf9 used bcp opcode to jump over 2-byte instruction. +%1: + ldw x, (%2, sp) +%5: +} if notUsed('n'), notUsed('z'), labelRefCountChange(%5 -1) + +// We don't have an explicit instruction to jump over 3 bytes. But when flags and a are not used, we can use ldf a, extmem instead. + +// Jumping over ldw x, #mmmm ldf does a read from a memory location at 0xaemmmm - which for all current STM8 is unused. If a future STM8 places memory-mapped I/O there, we're in trouble. +replace { + jra %5 +%1: + ldw x, #%2 +%5: +} by { + .byte 0xbc + ; peephole jrf10 used ldf opcode to jump over 3-byte instruction. +%1: + ldw x, #%2 +%5: +} if notUsed('a'), notUsed('n'), notUsed('z'), labelRefCountChange(%5 -1) + +// Jumping over clrw x; ldw (mm, sp), x ldf does a read from a memory location at 0x5f1fmm - which for all current STM8 is unused. If a future STM8 places memory-mapped I/O there, we're in trouble. +replace { + jra %5 +%1: + clrw x + ldw (%2, sp), x +%5: +} by { + .byte 0xbc + ; peephole jrf11 used ldf opcode to jump over 3-byte instruction. +%1: + clrw x + ldw (%2, sp), x +%5: +} if notUsed('a'), notUsed('n'), notUsed('z'), labelRefCountChange(%5 -1) + diff --git a/src/stm8/ralloc.c b/src/stm8/ralloc.c new file mode 100644 index 0000000..21ec727 --- /dev/null +++ b/src/stm8/ralloc.c @@ -0,0 +1,843 @@ +#include "ralloc.h" +#include "gen.h" + +#include "dbuf_string.h" + +extern void genSTM08Code (iCode *); + +reg_info stm8_regs[] = +{ + {REG_GPR, A_IDX, "a"}, + {REG_GPR, XL_IDX, "xl"}, + {REG_GPR, XH_IDX, "xh"}, + {REG_GPR, YL_IDX, "yl"}, + {REG_GPR, YH_IDX, "yh"}, + {REG_CND, C_IDX, "c"}, + {REG_GPR, X_IDX, "x"}, + {REG_GPR, Y_IDX, "y"}, + {0, SP_IDX, "sp"}, +}; + +/* Flags to turn on debugging code. + */ +enum +{ + D_ALLOC = 0, +}; + +/** Local static variables */ +static struct +{ + set *stackSpil; + int slocNum; + int stackExtend; + int dataExtend; +} _G; + +#if 1 +#define D(_a, _s) if (_a) { printf _s; fflush(stdout); } +#else +#define D(_a, _s) +#endif + +/** noOverLap - will iterate through the list looking for over lap + */ +static int +noOverLap (set *itmpStack, symbol *fsym) +{ + symbol *sym; + + for (sym = setFirstItem (itmpStack); sym; sym = setNextItem (itmpStack)) + { + if (bitVectBitValue (sym->clashes, fsym->key)) + return 0; +#if 0 + // if sym starts before (or on) our end point + // and ends after (or on) our start point, + // it is an overlap. + if (sym->liveFrom <= fsym->liveTo && sym->liveTo >= fsym->liveFrom) + { + return 0; + } +#endif + } + return 1; +} + +/*-----------------------------------------------------------------*/ +/* isFree - will return 1 if the a free spil location is found */ +/*-----------------------------------------------------------------*/ +DEFSETFUNC (isFreeSTM8) +{ + symbol *sym = item; + V_ARG (symbol **, sloc); + V_ARG (symbol *, fsym); + + /* if already found */ + if (*sloc) + return 0; + + /* if it is free && and the itmp assigned to + this does not have any overlapping live ranges + with the one currently being assigned and + the size can be accomodated */ + if (sym->isFree && noOverLap (sym->usl.itmpStack, fsym) && getSize (sym->type) >= getSize (fsym->type)) + { + *sloc = sym; + return 1; + } + + return 0; +} + +/*-----------------------------------------------------------------*/ +/* createStackSpil - create a location on the stack to spil */ +/*-----------------------------------------------------------------*/ +static symbol * +createStackSpil (symbol * sym) +{ + symbol *sloc = NULL; + struct dbuf_s dbuf; + + D (D_ALLOC, ("createStackSpil: for sym %p %s (old currFunc->stack %ld)\n", sym, sym->name, (long)(currFunc->stack))); + + dbuf_init (&dbuf, 128); + dbuf_printf (&dbuf, "sloc%d", _G.slocNum++); + sloc = newiTemp (dbuf_c_str (&dbuf)); + dbuf_destroy (&dbuf); + + /* set the type to the spilling symbol */ + sloc->type = copyLinkChain (sym->type); + sloc->etype = getSpec (sloc->type); + SPEC_SCLS (sloc->etype) = S_AUTO; + SPEC_EXTR (sloc->etype) = 0; + SPEC_STAT (sloc->etype) = 0; + SPEC_VOLATILE (sloc->etype) = 0; + + allocLocal (sloc); + + sloc->isref = 1; /* to prevent compiler warning */ + + wassertl (currFunc, "Local variable used outside of function."); + + /* if it is on the stack then update the stack */ + if (IN_STACK (sloc->etype)) + { + if (currFunc) + currFunc->stack += getSize (sloc->type); + _G.stackExtend += getSize (sloc->type); + } + else + { + _G.dataExtend += getSize (sloc->type); + } + + /* add it to the stackSpil set */ + addSetHead (&_G.stackSpil, sloc); + sym->usl.spillLoc = sloc; + sym->stackSpil = 1; + + /* add it to the set of itempStack set + of the spill location */ + addSetHead (&sloc->usl.itmpStack, sym); + + D (D_ALLOC, ("createStackSpil: created new %s\n", sloc->name)); + return sym; +} + +/*-----------------------------------------------------------------*/ +/* spillThis - spils a specific operand */ +/*-----------------------------------------------------------------*/ +void +stm8SpillThis (symbol *sym, bool force_spill) +{ + int i; + + D (D_ALLOC, ("stm8SpillThis: spilling %p (%s)\n", sym, sym->name)); + + /* if this is rematerializable or has a spillLocation + we are okay, else we need to create a spillLocation + for it */ + if (!(sym->remat || sym->usl.spillLoc) || (sym->usl.spillLoc && !sym->usl.spillLoc->onStack)) // stm8 port currently only supports on-stack spill locations in code generation. + createStackSpil (sym); + + /* mark it as spilt */ + sym->isspilt = sym->spillA = 1; + + if (force_spill) + for (i = 0; i < sym->nRegs; i++) + { + if (sym->regs[i]) + sym->regs[i] = 0; + } + + if (sym->usl.spillLoc && !sym->remat) + { + sym->usl.spillLoc->allocreq++; + } + return; +} + +/*-----------------------------------------------------------------*/ +/* regTypeNum - computes the type & number of registers required */ +/*-----------------------------------------------------------------*/ +static void +regTypeNum (void) +{ + symbol *sym; + int k; + + /* for each live range do */ + for (sym = hTabFirstItem (liveRanges, &k); sym; sym = hTabNextItem (liveRanges, &k)) + { + /* if used zero times then no registers needed. Exception: Variables larger than 4 bytes - these might need a spill location when they are return values */ + if ((sym->liveTo - sym->liveFrom) == 0 && getSize (sym->type) <= 4) + continue; + else if ((sym->liveTo - sym->liveFrom) == 0 && bitVectnBitsOn (sym->defs) <= 1) + { + iCode *dic = hTabItemWithKey (iCodehTab, bitVectFirstBit (sym->defs)); + if (!dic || dic->op != CALL && dic->op != PCALL) + continue; + } + + D (D_ALLOC, ("regTypeNum: loop on sym %p\n", sym)); + + /* if the live range is a temporary */ + if (sym->isitmp) + { + /* if the type is marked as a conditional */ + if (sym->regType == REG_CND) + continue; + + /* if used in return only then we don't + need registers */ + if (sym->ruonly || sym->accuse) + { + if (IS_AGGREGATE (sym->type) || sym->isptr) + sym->type = aggrToPtr (sym->type, FALSE); + continue; + } + + /* if not then we require registers */ + D (D_ALLOC, + ("regTypeNum: isagg %u nRegs %u type %p\n", IS_AGGREGATE (sym->type) || sym->isptr, sym->nRegs, sym->type)); + sym->nRegs = + ((IS_AGGREGATE (sym->type) + || sym->isptr) ? getSize (sym->type = aggrToPtr (sym->type, FALSE)) : getSize (sym->type)); + D (D_ALLOC, ("regTypeNum: setting nRegs of %s (%p) to %u\n", sym->name, sym, sym->nRegs)); + + D (D_ALLOC, ("regTypeNum: setup to assign regs sym %p\n", sym)); + + if (sym->nRegs > 8) + { + fprintf (stderr, "allocated more than 8 registers for type "); + printTypeChain (sym->type, stderr); + fprintf (stderr, "\n"); + } + + /* determine the type of register required */ + /* Always general purpose */ + sym->regType = REG_GPR; + } + else + { + /* for the first run we don't provide */ + /* registers for true symbols we will */ + /* see how things go */ + D (D_ALLOC, ("regTypeNum: #2 setting num of %p to 0\n", sym)); + sym->nRegs = 0; + } + } +} + +/** Transform weird SDCC handling of writes via pointers + into something more sensible. */ +static void +transformPointerSet (eBBlock **ebbs, int count) +{ + /* for all blocks */ + for (int i = 0; i < count; i++) + { + iCode *ic; + + /* for all instructions do */ + for (ic = ebbs[i]->sch; ic; ic = ic->next) + if (POINTER_SET (ic)) + { + IC_LEFT (ic) = IC_RESULT (ic); + IC_RESULT (ic) = 0; + ic->op = SET_VALUE_AT_ADDRESS; + } + } +} + + +/** Register reduction for assignment. + */ +static int +packRegsForAssign (iCode *ic, eBBlock *ebp) +{ + iCode *dic, *sic; + + if (!IS_ITEMP (IC_RIGHT (ic)) || OP_SYMBOL (IC_RIGHT (ic))->isind || OP_LIVETO (IC_RIGHT (ic)) > ic->seq) + return 0; + + /* Avoid having multiple named address spaces in one iCode. */ + if (IS_SYMOP (IC_RESULT (ic)) && SPEC_ADDRSPACE (OP_SYMBOL (IC_RESULT (ic))->etype)) + return 0; + + /* find the definition of iTempNN scanning backwards if we find a + a use of the true symbol in before we find the definition then + we cannot */ + for (dic = ic->prev; dic; dic = dic->prev) + { + /* PENDING: Don't pack across function calls. */ + if (dic->op == CALL || dic->op == PCALL) + { + dic = NULL; + break; + } + + if (SKIP_IC2 (dic)) + continue; + + if (dic->op == IFX) + { + if (IS_SYMOP (IC_COND (dic)) && + (IC_COND (dic)->key == IC_RESULT (ic)->key || IC_COND (dic)->key == IC_RIGHT (ic)->key)) + { + dic = NULL; + break; + } + } + else + { + if (IS_TRUE_SYMOP (IC_RESULT (dic)) && IS_OP_VOLATILE (IC_RESULT (dic))) + { + dic = NULL; + break; + } + + if (IS_SYMOP (IC_RESULT (dic)) && IC_RESULT (dic)->key == IC_RIGHT (ic)->key) + { + break; + } + + if (IS_SYMOP (IC_RIGHT (dic)) && + (IC_RIGHT (dic)->key == IC_RESULT (ic)->key || IC_RIGHT (dic)->key == IC_RIGHT (ic)->key)) + { + dic = NULL; + break; + } + + if (IS_SYMOP (IC_LEFT (dic)) && + (IC_LEFT (dic)->key == IC_RESULT (ic)->key || IC_LEFT (dic)->key == IC_RIGHT (ic)->key)) + { + dic = NULL; + break; + } + + if (IS_SYMOP (IC_RESULT (dic)) && IC_RESULT (dic)->key == IC_RESULT (ic)->key) + { + dic = NULL; + break; + } + } + } + + if (!dic) + return 0; /* did not find */ + + /* if assignment then check that right is not a bit */ + if (ic->op == '=') + { + sym_link *etype = operandType (IC_RESULT (dic)); + if (IS_BITFIELD (etype)) + { + /* if result is a bit too then it's ok */ + etype = operandType (IC_RESULT (ic)); + if (!IS_BITFIELD (etype)) + { + return 0; + } + } + } + + /* For now eliminate 8-bit temporary variables only. + The STM8 instructions operating directly on memory + operands are 8-bit, so the most benefit is in 8-bit + operations. On the other hand, supporting wider + operations well in codegen is also more effort. */ + if (bitsForType (operandType (IC_RESULT (dic))) > 8 && + !((dic->op == LEFT_OP || dic->op == RIGHT_OP) && IS_OP_LITERAL (IC_RIGHT (dic)) && operandLitValue (IC_RIGHT (dic)) == 1 && // Can do wide shift by 1 in place. + IS_SYMOP (IC_LEFT (dic)) && IS_SYMOP (IC_RESULT (ic)) && OP_SYMBOL (IC_LEFT (dic)) == OP_SYMBOL (IC_RESULT (ic)))) + return 0; + + /* if the result is on stack or iaccess then it must be + the same as at least one of the operands */ + if (OP_SYMBOL (IC_RESULT (ic))->onStack || OP_SYMBOL (IC_RESULT (ic))->iaccess) + { + /* the operation has only one symbol + operator then we can pack */ + if ((IC_LEFT (dic) && !IS_SYMOP (IC_LEFT (dic))) || (IC_RIGHT (dic) && !IS_SYMOP (IC_RIGHT (dic)))) + goto pack; + + if (!((IC_LEFT (dic) && + IC_RESULT (ic)->key == IC_LEFT (dic)->key) || (IC_RIGHT (dic) && IC_RESULT (ic)->key == IC_RIGHT (dic)->key))) + return 0; + } +pack: + /* found the definition */ + + /* delete from liverange table also + delete from all the points in between and the new + one */ + for (sic = dic; sic != ic; sic = sic->next) + { + bitVectUnSetBit (sic->rlive, IC_RESULT (ic)->key); + if (IS_ITEMP (IC_RESULT (dic))) + bitVectSetBit (sic->rlive, IC_RESULT (dic)->key); + } + + /* replace the result with the result of */ + /* this assignment and remove this assignment */ + bitVectUnSetBit (OP_SYMBOL (IC_RESULT (dic))->defs, dic->key); + IC_RESULT (dic) = IC_RESULT (ic); + + if (IS_ITEMP (IC_RESULT (dic)) && OP_SYMBOL (IC_RESULT (dic))->liveFrom > dic->seq) + { + OP_SYMBOL (IC_RESULT (dic))->liveFrom = dic->seq; + } + + remiCodeFromeBBlock (ebp, ic); + // PENDING: Check vs mcs51 + bitVectUnSetBit (OP_SYMBOL (IC_RESULT (ic))->defs, ic->key); + hTabDeleteItem (&iCodehTab, ic->key, ic, DELETE_ITEM, NULL); + OP_DEFS (IC_RESULT (dic)) = bitVectSetBit (OP_DEFS (IC_RESULT (dic)), dic->key); + return 1; +} + +/** Will reduce some registers for single use. + */ +static int +packRegsForOneuse (iCode *ic, operand **opp, eBBlock *ebp) +{ + iCode *dic; + + operand *op = *opp; +//printf("packRegsForOneuse() at ic %d\n", ic->key); + /* if returning a literal then do nothing */ + if (!IS_ITEMP (op)) + return 0; + + /* if rematerializable do nothing */ + if (OP_SYMBOL (op)->remat) + return 0; + + /* this routine will mark the symbol as used in one + instruction use only && if the definition is local + (ie. within the basic block) && has only one definition */ + if (bitVectnBitsOn (OP_USES (op)) != 1 || bitVectnBitsOn (OP_DEFS (op)) != 1) + return 0; + + /* get the definition */ + if (!(dic = hTabItemWithKey (iCodehTab, bitVectFirstBit (OP_DEFS (op))))) + return 0; +//printf("Found dic %d\n", dic->key); + /* found the definition now check if it is local */ + if (dic->seq < ebp->fSeq || dic->seq > ebp->lSeq) + return 0; /* non-local */ + + /* for now handle results from assignments from globals only */ + if (dic->op != '=' || !isOperandGlobal (IC_RIGHT (dic))) + return 0; + /* also make sure the intervenening instructions + don't have any thing in far space */ + for (iCode *nic = dic->next; nic && nic != ic; nic = nic->next) + { + /* if there is an intervening function call then no */ + if (nic->op == CALL || nic->op == PCALL) + return 0; + + if (nic->op == GET_VALUE_AT_ADDRESS || nic->op == SET_VALUE_AT_ADDRESS) + return 0; + + /* if address of & the result is remat, then okay */ + if (nic->op == ADDRESS_OF && OP_SYMBOL (IC_RESULT (nic))->remat) + continue; + + if (IS_OP_VOLATILE (IC_LEFT (nic)) || + IS_OP_VOLATILE (IC_RIGHT (nic)) || + isOperandGlobal (IC_RESULT (nic))) + return 0; + } + + /* Optimize out the assignment */ + *opp = operandFromOperand (IC_RIGHT(dic)); + (*opp)->isaddr = true; + + bitVectUnSetBit (OP_SYMBOL (op)->defs, dic->key); + bitVectUnSetBit (OP_SYMBOL (op)->uses, ic->key); + + if (IS_ITEMP (IC_RESULT (dic)) && OP_SYMBOL (IC_RESULT (dic))->liveFrom > dic->seq) + OP_SYMBOL (IC_RESULT (dic))->liveFrom = dic->seq; + + /* delete from liverange table also + delete from all the points in between and the new + one */ + for (iCode *nic = dic; nic != ic; nic = nic->next) + bitVectUnSetBit (nic->rlive, op->key); + + remiCodeFromeBBlock (ebp, dic); + + hTabDeleteItem (&iCodehTab, dic->key, ic, DELETE_ITEM, NULL); + + return 1; +} + +/** Does some transformations to reduce register pressure. + */ +static void +packRegisters (eBBlock * ebp) +{ + iCode *ic; + int change = 0; + + D (D_ALLOC, ("packRegisters: entered.\n")); + + for(;;) + { + change = 0; + /* look for assignments of the form */ + /* iTempNN = TRueSym (someoperation) SomeOperand */ + /* .... */ + /* TrueSym := iTempNN:1 */ + for (ic = ebp->sch; ic; ic = ic->next) + { + /* find assignment of the form TrueSym := iTempNN:1 */ + if (ic->op == '=') + change += packRegsForAssign (ic, ebp); + } + if (!change) + break; + } + + for (ic = ebp->sch; ic; ic = ic->next) + { + D (D_ALLOC, ("packRegisters: looping on ic %p\n", ic)); + + /* Safe: address of a true sym is always constant. */ + /* if this is an itemp & result of a address of a true sym + then mark this as rematerialisable */ + if (ic->op == ADDRESS_OF && + IS_ITEMP (IC_RESULT (ic)) && bitVectnBitsOn (OP_DEFS (IC_RESULT (ic))) == 1 && !IS_PARM (IC_RESULT (ic)) /* The receiving of the parameter is not accounted for in DEFS */ && + IS_TRUE_SYMOP (IC_LEFT (ic)) && !OP_SYMBOL (IC_LEFT (ic))->onStack) + { + OP_SYMBOL (IC_RESULT (ic))->remat = 1; + OP_SYMBOL (IC_RESULT (ic))->rematiCode = ic; + OP_SYMBOL (IC_RESULT (ic))->usl.spillLoc = NULL; + } + + if (ic->op == ADDRESS_OF && IS_ITEMP (IC_RESULT (ic)) && bitVectnBitsOn (OP_DEFS (IC_RESULT (ic))) == 1 && !IS_PARM (IC_RESULT (ic)) && /* Can remat stack locations, but they can currently only be used for pointer read / write */ + IS_TRUE_SYMOP (IC_LEFT (ic)) && OP_SYMBOL (IC_LEFT (ic))->onStack) + { + bool ok = true; + bitVect *uses = bitVectCopy (OP_USES (IC_RESULT (ic))); + for (int bit = bitVectFirstBit (uses); bitVectnBitsOn (uses); bitVectUnSetBit (uses, bit), bit = bitVectFirstBit (uses)) + { + const iCode *uic = hTabItemWithKey (iCodehTab, bit); + wassert (uic); + if (uic->op != SET_VALUE_AT_ADDRESS && uic->op != GET_VALUE_AT_ADDRESS) + { + ok = false; + break; + } + } + + if (ok) + { + OP_SYMBOL (IC_RESULT (ic))->remat = 1; + OP_SYMBOL (IC_RESULT (ic))->rematiCode = ic; + OP_SYMBOL (IC_RESULT (ic))->usl.spillLoc = NULL; + } + + freeBitVect (uses); + } + + /* Safe: just propagates the remat flag */ + /* if straight assignment then carry remat flag if this is the + only definition */ + if (ic->op == '=' && IS_SYMOP (IC_RIGHT (ic)) && OP_SYMBOL (IC_RIGHT (ic))->remat && + !isOperandGlobal (IC_RESULT (ic)) && bitVectnBitsOn (OP_SYMBOL (IC_RESULT (ic))->defs) == 1 && !IS_PARM (IC_RESULT (ic)) && /* The receiving of the paramter is not accounted for in DEFS */ + !OP_SYMBOL (IC_RESULT (ic))->addrtaken) + { + OP_SYMBOL (IC_RESULT (ic))->remat = OP_SYMBOL (IC_RIGHT (ic))->remat; + OP_SYMBOL (IC_RESULT (ic))->rematiCode = OP_SYMBOL (IC_RIGHT (ic))->rematiCode; + } + + /* if cast to a generic pointer & the pointer being + cast is remat, then we can remat this cast as well */ + if (ic->op == CAST && + IS_SYMOP (IC_RIGHT (ic)) && OP_SYMBOL (IC_RIGHT (ic))->remat && + !isOperandGlobal (IC_RESULT (ic)) && bitVectnBitsOn (OP_DEFS (IC_RESULT (ic))) == 1 && !IS_PARM (IC_RESULT (ic)) && /* The receiving of the paramter is not accounted for in DEFS */ + !OP_SYMBOL (IC_RESULT (ic))->addrtaken) + { + sym_link *to_type = operandType (IC_LEFT (ic)); + sym_link *from_type = operandType (IC_RIGHT (ic)); + if (IS_GENPTR (to_type) && IS_PTR (from_type)) + { + OP_SYMBOL (IC_RESULT (ic))->remat = 1; + OP_SYMBOL (IC_RESULT (ic))->rematiCode = ic; + OP_SYMBOL (IC_RESULT (ic))->usl.spillLoc = NULL; + } + } + + /* In some cases redundant moves can be eliminated */ + if (ic->op == GET_VALUE_AT_ADDRESS || ic->op == SET_VALUE_AT_ADDRESS || + ic->op == IFX && operandSize (IC_COND (ic)) == 1 || + ic->op == IPUSH && operandSize (IC_LEFT (ic)) == 1) + packRegsForOneuse (ic, &(IC_LEFT (ic)), ebp); + } + +} + +/** + Mark variables for assignment by the register allocator. + */ +static void +serialRegMark (eBBlock ** ebbs, int count) +{ + int i; + short int max_alloc_bytes = SHRT_MAX; // Byte limit. Set this to a low value to pass only few variables to the register allocator. This can be useful for debugging. + + stm8_call_stack_size = 2; // Saving of register to stack temporarily. + + D (D_ALLOC, ("serialRegMark for %s, currFunc->stack %d\n", currFunc->name, currFunc->stack)); + + /* for all blocks */ + for (i = 0; i < count; i++) + { + iCode *ic; + + if (ebbs[i]->noPath && (ebbs[i]->entryLabel != entryLabel && ebbs[i]->entryLabel != returnLabel)) + continue; + + /* for all instructions do */ + for (ic = ebbs[i]->sch; ic; ic = ic->next) + { + if ((ic->op == CALL || ic->op == PCALL) && ic->parmBytes + 5 > stm8_call_stack_size) + { + sym_link *dtype = operandType (IC_LEFT (ic)); + sym_link *ftype = IS_FUNCPTR (dtype) ? dtype->next : dtype; + + /* 5 for saving all registers at call site + 2 for big return value */ + stm8_call_stack_size = ic->parmBytes + 5 + 2 * (getSize (ftype->next) > 4); + } + + if (ic->op == IPOP) + wassert (0); + + /* if result is present && is a true symbol */ + if (IC_RESULT (ic) && ic->op != IFX && IS_TRUE_SYMOP (IC_RESULT (ic))) + OP_SYMBOL (IC_RESULT (ic))->allocreq++; + + /* some don't need registers, since there is no result. */ + if (SKIP_IC2 (ic) || + ic->op == JUMPTABLE || ic->op == IFX || ic->op == IPUSH || ic->op == IPOP || ic->op == SET_VALUE_AT_ADDRESS) + continue; + + /* now we need to allocate registers only for the result */ + if (IC_RESULT (ic)) + { + symbol *sym = OP_SYMBOL (IC_RESULT (ic)); + + D (D_ALLOC, ("serialRegAssign: in loop on result %p %s\n", sym, sym->name)); + + if (sym->isspilt && sym->usl.spillLoc) // todo: Remove once remat is supported! + { + sym->usl.spillLoc->allocreq--; + sym->isspilt = FALSE; + } + + /* Make sure any spill location is definately allocated */ + if (sym->isspilt && !sym->remat && sym->usl.spillLoc && !sym->usl.spillLoc->allocreq) + sym->usl.spillLoc->allocreq++; + + /* if it does not need or is spilt + or is already marked for the new allocator + or will not live beyond this instructions */ + if (!sym->nRegs || + sym->isspilt || sym->for_newralloc || sym->liveTo <= ic->seq && (sym->nRegs <= 4 || ic->op != CALL && ic->op != PCALL)) + { + D (D_ALLOC, ("serialRegMark: won't live long enough.\n")); + continue; + } + + if (sym->usl.spillLoc && !sym->usl.spillLoc->_isparm) // I have no idea where these spill locations come from. Sometime two symbols even have the same spill location, whic tends to mess up stack allocation. THose that come from previous iterations in this loop would be okay, but those from outside are a problem. + { + sym->usl.spillLoc = 0; + sym->isspilt = false; + } + + if (sym->nRegs > 4 && ic->op == CALL) // To be allocated to stack due to the way long long return values are handled via a hidden pointer. + { + sym->for_newralloc = 0; + stm8SpillThis (sym, TRUE); + } + else if (max_alloc_bytes >= sym->nRegs) + { + sym->for_newralloc = 1; + max_alloc_bytes -= sym->nRegs; + } + else if (!sym->for_newralloc) + { + stm8SpillThis (sym, TRUE); + printf ("Spilt %s due to byte limit.\n", sym->name); + } + } + } + } +} + +/*------------------------------------------------------------------*/ +/* verifyRegsAssigned - make sure an iTemp is properly initialized; */ +/* it should either have registers or have been spilled. Otherwise, */ +/* there was an uninitialized variable, so just spill this to get */ +/* the operand in a valid state. */ +/*------------------------------------------------------------------*/ +static void +verifyRegsAssigned (operand * op, iCode * ic) +{ + symbol *sym; + int i; + bool completely_in_regs; + + if (!op) + return; + if (!IS_ITEMP (op)) + return; + + sym = OP_SYMBOL (op); + + if (sym->regType == REG_CND) + return; + + if (sym->isspilt && !sym->remat && sym->usl.spillLoc && !sym->usl.spillLoc->allocreq) + sym->usl.spillLoc->allocreq++; + + if (sym->isspilt) + return; + + for(i = 0, completely_in_regs = TRUE; i < sym->nRegs; i++) + if (!sym->regs[i]) + completely_in_regs = FALSE; + if (completely_in_regs) + return; + + stm8SpillThis (sym, FALSE); +} + +void +stm8RegFix (eBBlock ** ebbs, int count) +{ + int i; + + /* Check for and fix any problems with uninitialized operands */ + for (i = 0; i < count; i++) + { + iCode *ic; + + if (ebbs[i]->noPath && (ebbs[i]->entryLabel != entryLabel && ebbs[i]->entryLabel != returnLabel)) + continue; + + for (ic = ebbs[i]->sch; ic; ic = ic->next) + { + if (SKIP_IC2 (ic)) + continue; + + if (ic->op == IFX) + { + verifyRegsAssigned (IC_COND (ic), ic); + continue; + } + + if (ic->op == JUMPTABLE) + { + verifyRegsAssigned (IC_JTCOND (ic), ic); + continue; + } + + verifyRegsAssigned (IC_RESULT (ic), ic); + verifyRegsAssigned (IC_LEFT (ic), ic); + verifyRegsAssigned (IC_RIGHT (ic), ic); + } + } +} + +void stm8_init_asmops (void); + +/*-----------------------------------------------------------------*/ +/* assignRegisters - assigns registers to each live range as need */ +/*-----------------------------------------------------------------*/ +void +stm8_assignRegisters (ebbIndex *ebbi) +{ + eBBlock **ebbs = ebbi->bbOrder; + int count = ebbi->count; + iCode *ic; + + stm8_init_asmops(); + + transformPointerSet (ebbs, count); + + /* change assignments this will remove some + live ranges reducing some register pressure */ + for (int i = 0; i < count; i++) + packRegisters (ebbs[i]); + + /* liveranges probably changed by register packing + so we compute them again */ + recomputeLiveRanges (ebbs, count, FALSE); + + if (options.dump_i_code) + dumpEbbsToFileExt (DUMP_PACK, ebbi); + + /* first determine for each live range the number of + registers & the type of registers required for each */ + regTypeNum (); + + /* Mark variables for assignment by the new allocator */ + serialRegMark (ebbs, count); + + stm8_extend_stack = stm8_call_stack_size > 255; + + /* Invoke optimal register allocator */ + ic = stm8_ralloc2_cc (ebbi); + + /* redo the offsets for stacked automatic variables */ + if (currFunc && !stm8_extend_stack && currFunc->stack + stm8_call_stack_size > 255) + { + _G.slocNum = 0; + + /* Mark variables for assignment by the new allocator */ + serialRegMark (ebbs, count); + + stm8_extend_stack = TRUE; + + /* Invoke optimal register allocator */ + ic = stm8_ralloc2_cc (ebbi); + + //redoStackOffsets (); + } + + if (options.dump_i_code) + { + dumpEbbsToFileExt (DUMP_RASSGN, ebbi); + dumpLiveRanges (DUMP_LRANGE, liveRanges); + } + + genSTM8Code (ic); + + _G.slocNum = 0; +} + diff --git a/src/stm8/ralloc.h b/src/stm8/ralloc.h new file mode 100644 index 0000000..0b7d1ed --- /dev/null +++ b/src/stm8/ralloc.h @@ -0,0 +1,70 @@ +/*------------------------------------------------------------------------- + + SDCCralloc.h - header file register allocation + + Written By - Philipp Krause . pkk@spth.de (2012) + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + In other words, you are welcome to use, share and improve this program. + You are forbidden to forbid anyone else to use, share and improve + what you give them. Help stamp out software-hoarding! +-------------------------------------------------------------------------*/ + +#ifndef SDCCRALLOC_H +#define SDCCRALLOC_H 1 + +#include "common.h" + +enum +{ + A_IDX = 0, // The accumulator + XL_IDX, // Lower byte of X + XH_IDX, // Upper byte of X + YL_IDX, // Lower byte of Y + YH_IDX, // Upper byte of Y + C_IDX, // Carry bit - for bool only. + + X_IDX, // X - for use with code generation support functions only. + Y_IDX, // Y - for use with code generation support functions only. + + SP_IDX // SP - for use with debug info. +}; + +enum +{ + REG_GPR = 2, + REG_CND = 4, +}; + +/* definition for the registers */ +typedef struct reg_info +{ + short type; /* can have value + REG_GPR, REG_PTR or REG_CND */ + short rIdx; /* index into register table */ + char *name; /* name */ +} reg_info; + +extern reg_info stm8_regs[]; + +void stm8_assignRegisters (ebbIndex *); + +void stm8SpillThis (symbol *sym, bool force_spill); +iCode *stm8_ralloc2_cc(ebbIndex *ebbi); + +void stm8RegFix (eBBlock ** ebbs, int count); +#endif + diff --git a/src/stm8/ralloc2.cc b/src/stm8/ralloc2.cc new file mode 100644 index 0000000..9f50d2c --- /dev/null +++ b/src/stm8/ralloc2.cc @@ -0,0 +1,608 @@ +// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 - 2013 +// +// (c) 2010 - 2013 Goethe-Universität Frankfurt +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +// An optimal, polynomial-time register allocator. + +// #define DEBUG_RALLOC_DEC // Uncomment to get debug messages while doing register allocation on the tree decomposition. +// #define DEBUG_RALLOC_DEC_ASS // Uncomment to get debug messages about assignments while doing register allocation on the tree decomposition (much more verbose than the one above). + +#include "SDCCralloc.hpp" +#include "SDCCsalloc.hpp" + +extern "C" +{ + #include "ralloc.h" + #include "gen.h" + float drySTM8iCode (iCode *ic); + bool stm8_assignment_optimal; + long int stm8_call_stack_size; + bool stm8_extend_stack; +} + +#define REG_A 0 +#define REG_XL 1 +#define REG_XH 2 +#define REG_YL 3 +#define REG_YH 4 +#define REG_C 5 + +template <class I_t> +static void add_operand_conflicts_in_node(const cfg_node &n, I_t &I) +{ + const iCode *ic = n.ic; + + const operand *result = IC_RESULT(ic); + const operand *left = IC_LEFT(ic); + const operand *right = IC_RIGHT(ic); + + if(!result || !IS_SYMOP(result)) + return; + + // Todo: More fine-grained control for these. + if (!(ic->op == '+' || ic->op == '-' || ic->op == UNARYMINUS && !IS_FLOAT (operandType (left)) || ic->op == '~' || + ic->op == '^' || ic->op == '|' || ic->op == BITWISEAND || + ic->op == GET_VALUE_AT_ADDRESS)) + return; + + operand_map_t::const_iterator oir, oir_end, oirs; + boost::tie(oir, oir_end) = n.operands.equal_range(OP_SYMBOL_CONST(result)->key); + if(oir == oir_end) + return; + + operand_map_t::const_iterator oio, oio_end; + + if(left && IS_SYMOP(left)) + for(boost::tie(oio, oio_end) = n.operands.equal_range(OP_SYMBOL_CONST(left)->key); oio != oio_end; ++oio) + for(oirs = oir; oirs != oir_end; ++oirs) + { + var_t rvar = oirs->second; + var_t ovar = oio->second; + if(I[rvar].byte < I[ovar].byte) + boost::add_edge(rvar, ovar, I); + } + + if(right && IS_SYMOP(right)) + for(boost::tie(oio, oio_end) = n.operands.equal_range(OP_SYMBOL_CONST(right)->key); oio != oio_end; ++oio) + for(oirs = oir; oirs != oir_end; ++oirs) + { + var_t rvar = oirs->second; + var_t ovar = oio->second; + if(I[rvar].byte < I[ovar].byte) + boost::add_edge(rvar, ovar, I); + } +} + +// Return true, iff the operand is placed (partially) in r. +template <class G_t> +static bool operand_in_reg(const operand *o, reg_t r, const i_assignment_t &ia, unsigned short int i, const G_t &G) +{ + if(!o || !IS_SYMOP(o)) + return(false); + + if(r >= port->num_regs) + return(false); + + operand_map_t::const_iterator oi, oi_end; + for(boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); oi != oi_end; ++oi) + if(oi->second == ia.registers[r][1] || oi->second == ia.registers[r][0]) + return(true); + + return(false); +} + +template <class G_t, class I_t> +static bool Ainst_ok(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + const iCode *ic = G[i].ic; + const operand *const left = IC_LEFT(ic); + + const i_assignment_t &ia = a.i_assignment; + + if(ia.registers[REG_A][1] < 0) + return(true); // Register a not in use. + + if(ic->op == IPUSH) + { + if (ia.registers[REG_XL][1] < 0 || ia.registers[REG_YL][1] < 0 && !stm8_extend_stack) + return(true); // Register xl or yl free; code generation can use them when a is not available. + + // push a does not disturb a. + if (getSize(operandType(IC_LEFT(ic))) <= 1 && operand_in_reg(left, REG_A, ia, i, G)) + return(true); + + // push #byte does not disturb a. + if (IS_OP_LITERAL(left)) + return(true); + + // push longmem does not disturb a. + if (IS_OP_GLOBAL(left)) + return(true); + + // Only look at itemp pushes below. + if (!IS_ITEMP(left)) + return(false); + + // Register pushes do not disturb a. + for (unsigned short i = 0; i < getSize(operandType(IC_LEFT(ic)));) + { + if(operand_in_reg(left, REG_A, ia, i, G)) + i++; + else if(operand_in_reg(left, REG_XL, ia, i, G) && operand_in_reg(left, REG_XH, ia, i + 1, G)) + i += 2; + else if(operand_in_reg(left, REG_YL, ia, i, G) && operand_in_reg(left, REG_YH, ia, i + 1, G)) + i += 2; + else if(operand_in_reg(left, REG_XL, ia, i, G) || operand_in_reg(left, REG_YL, ia, i, G)) + i++; + else + return(false); + } + + return(true); + } + + return(true); +} + +template <class G_t, class I_t> +static bool Yinst_ok(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + const iCode *ic = G[i].ic; + const operand *const left = IC_LEFT(ic); + + const i_assignment_t &ia = a.i_assignment; + + if(!stm8_extend_stack) + return(true); // Only an extended stack can make Y unavailable. + + if(ia.registers[REG_YL][1] < 0 && ia.registers[REG_YH][1] < 0) + return(true); // Register Y not in use. + + return(false); +} + +template <class G_t, class I_t> +static void set_surviving_regs(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + iCode *ic = G[i].ic; + + bitVectClear(ic->rMask); + bitVectClear(ic->rSurv); + + cfg_alive_t::const_iterator v, v_end; + for (v = G[i].alive.begin(), v_end = G[i].alive.end(); v != v_end; ++v) + { + if(a.global[*v] < 0) + continue; + ic->rMask = bitVectSetBit(ic->rMask, a.global[*v]); + + if(!(IC_RESULT(ic) && IS_SYMOP(IC_RESULT(ic)) && OP_SYMBOL_CONST(IC_RESULT(ic))->key == I[*v].v)) + if(G[i].dying.find(*v) == G[i].dying.end()) + ic->rSurv = bitVectSetBit(ic->rSurv, a.global[*v]); + } +} + +template <class G_t, class I_t> +static void assign_operand_for_cost(operand *o, const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + if(!o || !IS_SYMOP(o)) + return; + symbol *sym = OP_SYMBOL(o); + operand_map_t::const_iterator oi, oi_end; + for(boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); oi != oi_end; ++oi) + { + var_t v = oi->second; + if(a.global[v] >= 0) + { + sym->regs[I[v].byte] = stm8_regs + a.global[v]; + sym->nRegs = I[v].size; + } + else + { + sym->regs[I[v].byte] = 0; + sym->nRegs = I[v].size; + } + } +} + +template <class G_t, class I_t> +static void assign_operands_for_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + const iCode *ic = G[i].ic; + + if(ic->op == IFX) + assign_operand_for_cost(IC_COND(ic), a, i, G, I); + else if(ic->op == JUMPTABLE) + assign_operand_for_cost(IC_JTCOND(ic), a, i, G, I); + else + { + assign_operand_for_cost(IC_LEFT(ic), a, i, G, I); + assign_operand_for_cost(IC_RIGHT(ic), a, i, G, I); + assign_operand_for_cost(IC_RESULT(ic), a, i, G, I); + } + + if(ic->op == SEND && ic->builtinSEND) + assign_operands_for_cost(a, (unsigned short)*(adjacent_vertices(i, G).first), G, I); +} + +template <class G_t, class I_t> +static bool operand_sane(const operand *o, const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ +#if 0 + int v, byteregs[8]; // Todo: Change this when sdcc supports variables larger than 8 bytes. + unsigned short int size; + + if(!o || !IS_SYMOP(o)) + return(true); + + operand_map_t::const_iterator oi, oi_end; + boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); + + if(oi == oi_end) + return(true); + + // Ensure: Fully in registers or fully in mem. + if(a.local.find(oi->second) != a.local.end()) + { + while(++oi != oi_end) + if(a.local.find(oi->second) == a.local.end()) + return(false); + } + else + { + while(++oi != oi_end) + if(a.local.find(oi->second) != a.local.end()) + return(false); + } + + boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); + v = oi->second; + byteregs[I[v].byte] = a.global[v]; + size = 1; + while(++oi != oi_end) + { + v = oi->second; + byteregs[I[v].byte] = a.global[v]; + size++; + } + + if (byteregs[0] == -1) + return(true); + + // Ensure: 8 bit only in A, 16 bit only in X or Y. + if (size == 1) + return(byteregs[0] == A_IDX); + if (size == 2) + return(byteregs[0] == XL_IDX && byteregs[1] == XH_IDX || byteregs[0] == YL_IDX && byteregs[1] == YH_IDX); + if (size > 2) + return(false); +#endif + + return(true); +} + +template <class G_t, class I_t> +static bool inst_sane(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + const iCode *ic = G[i].ic; + + return(operand_sane(IC_RESULT(ic), a, i, G, I) && operand_sane(IC_LEFT(ic), a, i, G, I) && operand_sane(IC_RIGHT(ic), a, i, G, I)); +} + +// Cost function. +template <class G_t, class I_t> +static float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + iCode *ic = G[i].ic; + float c; + + wassert(TARGET_IS_STM8); + wassert(ic); + + if(!inst_sane(a, i, G, I)) + return(std::numeric_limits<float>::infinity()); + +#if 0 + std::cout << "Calculating at cost at ic " << ic->key << ", op " << ic->op << " for: "; + print_assignment(a); + std::cout << "\n"; + std::cout.flush(); +#endif + + if(ic->generated) + { +#if 0 + std::cout << "Skipping, already generated.\n"; +#endif + return(0.0f); + } + + if(!Ainst_ok(a, i, G, I)) + return(std::numeric_limits<float>::infinity()); + + if(!Yinst_ok(a, i, G, I)) + return(std::numeric_limits<float>::infinity()); + + switch(ic->op) + { + // Register assignment doesn't matter for these: + case FUNCTION: + case ENDFUNCTION: + case LABEL: + case GOTO: + case INLINEASM: +#if 0 + std::cout << "Skipping, indepent from assignment.\n"; +#endif + return(0.0f); + case '!': + case '~': + case UNARYMINUS: + case '+': + case '-': + case '^': + case '|': + case BITWISEAND: + case IPUSH: + //case IPOP: + case CALL: + case PCALL: + case RETURN: + case '*': + case '/': + case '%': + case '>': + case '<': + case LE_OP: + case GE_OP: + case EQ_OP: + case NE_OP: + case AND_OP: + case OR_OP: + case GETABIT: + case GETBYTE: + case GETWORD: + case LEFT_OP: + case RIGHT_OP: + case GET_VALUE_AT_ADDRESS: + case SET_VALUE_AT_ADDRESS: + case '=': + case IFX: + case ADDRESS_OF: + case JUMPTABLE: + case CAST: + /*case RECEIVE: + case SEND:*/ + case DUMMY_READ_VOLATILE: + /*case CRITICAL: + case ENDCRITICAL:*/ + case SWAP: + assign_operands_for_cost(a, i, G, I); + set_surviving_regs(a, i, G, I); + c = drySTM8iCode(ic); + ic->generated = false; +#if 0 + std::cout << "Got cost " << c << "\n"; +#endif + return(c); + default: + return(0.0f); + } +} + +// For early removal of assignments that cannot be extended to valid assignments. This is just a dummy for now. +template <class G_t, class I_t> +static bool assignment_hopeless(const assignment &a, unsigned short int i, const G_t &G, const I_t &I, const var_t lastvar) +{ + return(false); +} + +// Increase chance of finding good compatible assignments at join nodes. +template <class T_t> +static void get_best_local_assignment_biased(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T) +{ + a = *T[t].assignments.begin(); + + std::set<var_t>::const_iterator vi, vi_end; + varset_t newlocal; + std::set_union(T[t].alive.begin(), T[t].alive.end(), a.local.begin(), a.local.end(), std::inserter(newlocal, newlocal.end())); + a.local = newlocal; +} + +// Suggest to honor register keyword and to not reverse bytes and prefer use of a. Prefer x over y. +template <class G_t, class I_t> +static float rough_cost_estimate(const assignment &a, unsigned short int i, const G_t &G, const I_t &I) +{ + const i_assignment_t &ia = a.i_assignment; + float c = 0.0f; + + if(ia.registers[REG_A][1] < 0) + c += 0.05f; + + varset_t::const_iterator v, v_end; + for(v = a.local.begin(), v_end = a.local.end(); v != v_end; ++v) + { + const symbol *const sym = (symbol *)(hTabItemWithKey(liveRanges, I[*v].v)); + if(a.global[*v] < 0 && !sym->remat) // Try to put non-rematerializeable variables into registers. + c += 0.1f; + if(a.global[*v] < 0 && IS_REGISTER(sym->type)) // Try to honour register keyword. + c += 4.0f; + if((I[*v].byte % 2) ? // Try not to reverse bytes. + (a.global[*v] == REG_XL || a.global[*v] == REG_YL) : + (a.global[*v] == REG_XH || a.global[*v] == REG_YH)) + c += 0.1f; + } + + return(c); +} + +// Code for another ic is generated when generating this one. Mark the other as generated. +static void extra_ic_generated(iCode *ic) +{ + if(ic->op == '>' || ic->op == '<' || ic->op == LE_OP || ic->op == GE_OP || ic->op == EQ_OP || ic->op == NE_OP || + ic->op == BITWISEAND && (IS_OP_LITERAL (IC_LEFT (ic)) || IS_OP_LITERAL (IC_RIGHT (ic))) || ic->op == GETABIT) + { + iCode *ifx; + + // Bitwise and code generation can only do the jump if one operand is a literal with at most one nonzero byte. + if (ic->op == BITWISEAND && getSize(operandType(IC_RESULT(ic))) > 1) + { + int nonzero = 0; + operand *const litop = IS_OP_LITERAL (IC_LEFT (ic)) ? IC_LEFT (ic) : IC_RIGHT (ic); + + for(unsigned int i = 0; i < getSize(operandType(IC_LEFT (ic))) && i < getSize(operandType(IC_RIGHT (ic))) && i < getSize(operandType(IC_RESULT(ic))); i++) + if(byteOfVal (OP_VALUE (litop), i)) + nonzero++; + + if(nonzero > 1) + return; + } + if (ic->op == GETABIT) + { + unsigned bit = byteOfVal (OP_VALUE (IC_RIGHT (ic)), 0); + + if (bit % 8 != 7) + return; + } + + if (ifx = ifxForOp (IC_RESULT (ic), ic)) + { + OP_SYMBOL (IC_RESULT (ic))->for_newralloc = false; + OP_SYMBOL (IC_RESULT (ic))->regType = REG_CND; + ifx->generated = true; + } + } +} + +template <class T_t, class G_t, class I_t, class SI_t> +static bool tree_dec_ralloc(T_t &T, G_t &G, const I_t &I, SI_t &SI) +{ + bool assignment_optimal; + + con2_t I2(boost::num_vertices(I)); + for(unsigned int i = 0; i < boost::num_vertices(I); i++) + { + I2[i].v = I[i].v; + I2[i].byte = I[i].byte; + I2[i].size = I[i].size; + I2[i].name = I[i].name; + } + typename boost::graph_traits<I_t>::edge_iterator e, e_end; + for(boost::tie(e, e_end) = boost::edges(I); e != e_end; ++e) + add_edge(boost::source(*e, I), boost::target(*e, I), I2); + + assignment ac; + assignment_optimal = true; + tree_dec_ralloc_nodes(T, find_root(T), G, I2, ac, &assignment_optimal); + + const assignment &winner = *(T[find_root(T)].assignments.begin()); + +#ifdef DEBUG_RALLOC_DEC + std::cout << "Winner: "; + for(unsigned int i = 0; i < boost::num_vertices(I); i++) + { + std::cout << "(" << i << ", " << int(winner.global[i]) << ") "; + } + std::cout << "\n"; + std::cout << "Cost: " << winner.s << "\n"; + std::cout.flush(); +#endif + + // Todo: Make this an assertion + if(winner.global.size() != boost::num_vertices(I)) + { + std::cerr << "ERROR: No Assignments at root\n"; + exit(-1); + } + + for(unsigned int v = 0; v < boost::num_vertices(I); v++) + { + symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, I[v].v)); + bool spilt = false; + + if(winner.global[v] >= 0) + sym->regs[I[v].byte] = stm8_regs + winner.global[v]; + else + { + sym->regs[I[v].byte] = 0; + spilt = true; + } + + if(spilt) + stm8SpillThis(sym, true); + + sym->nRegs = I[v].size; + } + + for(unsigned int i = 0; i < boost::num_vertices(G); i++) + set_surviving_regs(winner, i, G, I); + + set_spilt(G, I, SI); + + return(!assignment_optimal); +} + +iCode *stm8_ralloc2_cc(ebbIndex *ebbi) +{ + eBBlock **const ebbs = ebbi->bbOrder; + const int count = ebbi->count; + +#ifdef DEBUG_RALLOC_DEC + std::cout << "Processing " << currFunc->name << " from " << dstFileName << "\n"; std::cout.flush(); +#endif + + cfg_t control_flow_graph; + + con_t conflict_graph; + + iCode *ic = create_cfg(control_flow_graph, conflict_graph, ebbi); + + if(options.dump_graphs) + dump_cfg(control_flow_graph); + + if(options.dump_graphs) + dump_con(conflict_graph); + + tree_dec_t tree_decomposition; + + get_nice_tree_decomposition(tree_decomposition, control_flow_graph); + + alive_tree_dec(tree_decomposition, control_flow_graph); + + good_re_root(tree_decomposition); + nicify(tree_decomposition); + alive_tree_dec(tree_decomposition, control_flow_graph); + + if(options.dump_graphs) + dump_tree_decomposition(tree_decomposition); + + guessCounts (ic, ebbi); + + scon_t stack_conflict_graph; + + stm8_assignment_optimal = !tree_dec_ralloc(tree_decomposition, control_flow_graph, conflict_graph, stack_conflict_graph); + + stm8RegFix (ebbs, count); + + chaitin_salloc(stack_conflict_graph); + + if(options.dump_graphs) + dump_scon(stack_conflict_graph); + + return(ic); +} + diff --git a/src/stm8/stm8.vcxproj b/src/stm8/stm8.vcxproj new file mode 100644 index 0000000..830d2c6 --- /dev/null +++ b/src/stm8/stm8.vcxproj @@ -0,0 +1,114 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="peeph.def">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gawk -f ../SDCCpeeph.awk %(Identity) >peeph.rul</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gawk -f ../SDCCpeeph.awk %(Identity) >peeph.rul</Command>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Generating Peephole Rule: peeph.rul</Message>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Generating Peephole Rule: peeph.rul</Message>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">peeph.rul;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">peeph.rul;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\common.h" />
+ <ClInclude Include="gen.h" />
+ <ClInclude Include="peep.h" />
+ <ClInclude Include="ralloc.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="gen.c" />
+ <ClCompile Include="main.c" />
+ <ClCompile Include="peep.c" />
+ <ClCompile Include="ralloc.c" />
+ <ClCompile Include="ralloc2.cc" />
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{0BA12B9F-BCD6-4C08-9992-69B4FB32D335}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>stm8</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="..\..\SDCC.props" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="..\..\SDCC.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <TargetName>port</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <TargetName>port</TargetName>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..;.;..\..;..\..\support\util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..;.;..\..;..\..\support\util;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file diff --git a/src/stm8/stm8.vcxproj.filters b/src/stm8/stm8.vcxproj.filters new file mode 100644 index 0000000..2b6aad8 --- /dev/null +++ b/src/stm8/stm8.vcxproj.filters @@ -0,0 +1,52 @@ +<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+ <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+ <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+ </Filter>
+ <Filter Include="Custom Build">
+ <UniqueIdentifier>{06d2760c-63da-486c-b9b7-ee18894e32a7}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="ralloc.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="gen.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="..\common.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="peep.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="ralloc.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="gen.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="main.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ralloc2.cc">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="peep.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="peeph.def">
+ <Filter>Custom Build</Filter>
+ </CustomBuild>
+ </ItemGroup>
+</Project>
\ No newline at end of file |
