Integer arithmetic instructions



write me

S(x): 31th bit of x for 32-bit x, 15th for 16-bit x.
SEX(x): sign-extension of x
ZEX(x): zero-extension of x

Addition/substraction: (h)add, (h)sub, (h)subr, (h)addc


write me

add [sat] b32/b16 [CDST] DST SRC1 SRC2        O2=0, O1=0
sub [sat] b32/b16 [CDST] DST SRC1 SRC2        O2=0, O1=1
subr [sat] b32/b16 [CDST] DST SRC1 SRC2       O2=1, O1=0
addc [sat] b32/b16 [CDST] DST SRC1 SRC2 COND      O2=1, O1=1

All operands are 32-bit or 16-bit according to size specifier.

  b16/b32 s1, s2;
  bool c;
  switch (OP) {
      case add: s1 = SRC1, s2 = SRC2, c = 0; break;
      case sub: s1 = SRC1, s2 = ~SRC2, c = 1; break;
      case subr: s1 = ~SRC1, s2 = SRC2, c = 1; break;
      case addc: s1 = SRC1, s2 = SRC2, c = COND.C; break;
  res = s1+s2+c;  // infinite precision
  CDST.C = res >> (b32 ? 32 : 16);
  res = res & (b32 ? 0xffffffff : 0xffff);
  CDST.O = (S(s1) == S(s2)) && (S(s1) != S(res));
  if (sat && CDST.O)
      if (S(res)) res = (b32 ? 0x7fffffff : 0x7fff);
      else res = (b32 ? 0x80000000 : 0x8000);
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Short/imm:    0x20000000 base opcode
      0x10000000 O2 bit
      0x00400000 O1 bit
      0x00008000 0: b16, 1: b32
      0x00000100 sat flag
      operands: S*DST, S*SRC1/S*SHARED, S*SRC2/S*CONST/IMM, $c0

Long:     0x20000000 0x00000000 base opcode
      0x10000000 0x00000000 O2 bit
      0x00400000 0x00000000 O1 bit
      0x00000000 0x04000000 0: b16, 1: b32
      0x00000000 0x08000000 sat flag

Multiplication: mul(24)


write me

mul [CDST] DST u16/s16 SRC1 u16/s16 SRC2

DST is 32-bit, SRC1 and SRC2 are 16-bit.

  b32 s1, s2;
  if (src1_signed)
      s1 = SEX(SRC1);
      s1 = ZEX(SRC1);
  if (src2_signed)
      s2 = SEX(SRC2);
      s2 = ZEX(SRC2);
  b32 res = s1*s2;    // modulo 2^32
  CDST.O = 0;
  CDST.C = 0;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Short/imm:    0x40000000 base opcode
      0x00008000 src1 is signed
      0x00000100 src2 is signed

Long:     0x40000000 0x00000000 base opcode
      0x00000000 0x00008000 src1 is signed
      0x00000000 0x00004000 src2 is signed
mul [CDST] DST [high] u24/s24 SRC1 SRC2

All operands are 32-bit.

  b48 s1, s2;
  if (signed) {
      s1 = SEX((b24)SRC1);
      s2 = SEX((b24)SRC2);
  } else {
      s1 = ZEX((b24)SRC1);
      s2 = ZEX((b24)SRC2);
  b48 m = s1*s2;  // modulo 2^48
  b32 res = (high ? m >> 16 : m & 0xffffffff);
  CDST.O = 0;
  CDST.C = 0;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Short/imm:    0x40000000 base opcode
      0x00008000 src are signed
      0x00000100 high

Long:     0x40000000 0x00000000 base opcode
      0x00000000 0x00008000 src are signed
      0x00000000 0x00004000 high

Multiply-add: madd(24), msub(24), msubr(24), maddc(24)


write me

addop [CDST] DST mul u16 SRC1 SRC2 SRC3       O1=0 O2=000 S2=0 S1=0
addop [CDST] DST mul s16 SRC1 SRC2 SRC3       O1=0 O2=001 S2=0 S1=1
addop sat [CDST] DST mul s16 SRC1 SRC2 SRC3       O1=0 O2=010 S2=1 S1=0
addop [CDST] DST mul u24 SRC1 SRC2 SRC3       O1=0 O2=011 S2=1 S1=1
addop [CDST] DST mul s24 SRC1 SRC2 SRC3       O1=0 O2=100
addop sat [CDST] DST mul s24 SRC1 SRC2 SRC3       O1=0 O2=101
addop [CDST] DST mul high u24 SRC1 SRC2 SRC3  O1=0 O2=110
addop [CDST] DST mul high s24 SRC1 SRC2 SRC3  O1=0 O2=111
addop sat [CDST] DST mul high s24 SRC1 SRC2 SRC3  O1=1 O2=000

addop is one of:

add   O3=00   S4=0 S3=0
sub   O3=01   S4=0 S3=1
subr  O3=10   S4=1 S3=0
addc  O3=11   S4=1 S3=1

If addop is addc, insn also takes an additional COND parameter. DST and
SRC3 are always 32-bit, SRC1 and SRC2 are 16-bit for u16/s16 variants,
32-bit for u24/s24 variants. Only a few of the variants are encodable as
short/immediate, and they're restricted to DST=SRC3.

  if (u24 || s24) {
      b48 s1, s2;
      if (s24) {
          s1 = SEX((b24)SRC1);
          s2 = SEX((b24)SRC2);
      } else {
          s1 = ZEX((b24)SRC1);
          s2 = ZEX((b24)SRC2);
      b48 m = s1*s2;  // modulo 2^48
      b32 mres = (high ? m >> 16 : m & 0xffffffff);
  } else {
      b32 s1, s2;
      if (s16) {
          s1 = SEX(SRC1);
          s2 = SEX(SRC2);
      } else {
          s1 = ZEX(SRC1);
          s2 = ZEX(SRC2);
      b32 mres = s1*s2;   // modulo 2^32
  b32 s1, s2;
  bool c;
  switch (OP) {
      case add: s1 = mres, s2 = SRC3, c = 0; break;
      case sub: s1 = mres, s2 = ~SRC3, c = 1; break;
      case subr: s1 = ~mres, s2 = SRC3, c = 1; break;
      case addc: s1 = mres, s2 = SRC3, c = COND.C; break;
  res = s1+s2+c;  // infinite precision
  CDST.C = res >> 32;
  res = res & 0xffffffff;
  CDST.O = (S(s1) == S(s2)) && (S(s1) != S(res));
  if (sat && CDST.O)
      if (S(res)) res = 0x7fffffff;
      else res = 0x80000000;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Short/imm:    0x60000000 base opcode
      0x00000100 S1
      0x00008000 S2
      0x00400000 S3
      0x10000000 S4
      operands: SDST, S*SRC/S*SHARED, S*SRC2/S*CONST/IMM, SDST, $c0

Long:     0x60000000 0x00000000 base opcode
      0x10000000 0x00000000 O1
      0x00000000 0xe0000000 O2
      0x00000000 0x0c000000 O3

Sum of absolute differences: sad, hsad


write me

sad [CDST] DST u16/s16/u32/s32 SRC1 SRC2 SRC3

Short variant is restricted to DST same as SRC3. All operands are 32-bit or
16-bit according to size specifier.

  int s1, s2; // infinite precision
  if (signed) {
      s1 = SEX(SRC1);
      s2 = SEX(SRC2);
  } else {
      s1 = ZEX(SRC1);
      s2 = ZEX(SRC2);
  b32 mres = abs(s1-s2);  // modulo 2^32
  res = mres+s3;      // infinite precision
  CDST.C = res >> (b32 ? 32 : 16);
  res = res & (b32 ? 0xffffffff : 0xffff);
  CDST.O = (S(mres) == S(s3)) && (S(mres) != S(res));
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Short:    0x50000000 base opcode
      0x00008000 0: b16 1: b32
      0x00000100 src are signed

Long:     0x50000000 0x00000000 base opcode
      0x00000000 0x04000000 0: b16, 1: b32
      0x00000000 0x08000000 src sre signed

Min/max selection: (h)min, (h)max


write me

min u16/u32/s16/s32 [CDST] DST SRC1 SRC2
max u16/u32/s16/s32 [CDST] DST SRC1 SRC2

All operands are 32-bit or 16-bit according to size specifier.

  if (SRC1 < SRC2) { // signed comparison for s16/s32, unsigned for u16/u32.
      res = (min ? SRC1 : SRC2);
  } else {
      res = (min ? SRC2 : SRC1);
  CDST.O = 0;
  CDST.C = 0;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Long:     0x30000000 0x80000000 base opcode
      0x00000000 0x20000000 0: max, 1: min
      0x00000000 0x08000000 0: u16/u32, 1: s16/s32
      0x00000000 0x04000000 0: b16, 1: b32
      operands: MCDST, LL*DST, L*SRC1/L*SHARED, L*SRC2/L*CONST2

Comparison: set, hset


write me

set [CDST] DST cond u16/s16/u32/s32 SRC1 SRC2

cond can be any subset of {l, g, e}.

All operands are 32-bit or 16-bit according to size specifier.

  int s1, s2; // infinite precision
  if (signed) {
      s1 = SEX(SRC1);
      s2 = SEX(SRC2);
  } else {
      s1 = ZEX(SRC1);
      s2 = ZEX(SRC2);
  bool c;
  if (s1 < s2)
      c = cond.l;
  else if (s1 == s2)
      c = cond.e;
  else /* s1 > s2 */
      c = cond.g;
  if (c) {
      res = (b32?0xffffffff:0xffff);
  } else {
      res = 0;
  CDST.O = 0;
  CDST.C = 0;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Long:     0x30000000 0x60000000 base opcode
      0x00000000 0x08000000 0: u16/u32, 1: s16/s32
      0x00000000 0x04000000 0: b16, 1: b32
      0x00000000 0x00010000 cond.g
      0x00000000 0x00008000 cond.e
      0x00000000 0x00004000 cond.l
      operands: MCDST, LL*DST, L*SRC1/L*SHARED, L*SRC2/L*CONST2

Bitwise operations: (h)and, (h)or, (h)xor, (h)mov2


write me

and b32/b16 [CDST] DST [not] SRC1 [not] SRC2      O2=0, O1=0
or b32/b16 [CDST] DST [not] SRC1 [not] SRC2       O2=0, O1=1
xor b32/b16 [CDST] DST [not] SRC1 [not] SRC2      O2=1, O1=0
mov2 b32/b16 [CDST] DST [not] SRC1 [not] SRC2     O2=1, O1=1

Immediate forms only allows 32-bit operands, and cannot negate second op.

  s1 = (not1 ? ~SRC1 : SRC1);
  s2 = (not2 ? ~SRC2 : SRC2);
  switch (OP) {
      case and: res = s1 & s2; break;
      case or: res = s1 | s2; break;
      case xor: res = s1 ^ s2; break;
      case mov2: res = s2; break;
  CDST.O = 0;
  CDST.C = 0;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Imm:      0xd0000000 base opcode
      0x00400000 not1
      0x00008000 O2 bit
      0x00000100 O1 bit
      operands: SDST, SSRC/SSHARED, IMM
      assumed: not2=0 and b32.

Long:     0xd0000000 0x00000000 base opcode
      0x00000000 0x04000000 0: b16, 1: b32
      0x00000000 0x00020000 not2
      0x00000000 0x00010000 not1
      0x00000000 0x00008000 O2 bit
      0x00000000 0x00004000 O1 bit
      operands: MCDST, LL*DST, L*SRC1/L*SHARED, L*SRC2/L*CONST2

Bit shifts: (h)shl, (h)shr, (h)sar


write me

shl b16/b32 [CDST] DST SRC1 SRC2
shl b16/b32 [CDST] DST SRC1 SHCNT
shr u16/u32 [CDST] DST SRC1 SRC2
shr u16/u32 [CDST] DST SRC1 SHCNT
shr s16/s32 [CDST] DST SRC1 SRC2
shr s16/s32 [CDST] DST SRC1 SHCNT

  All operands 16/32-bit according to size specifier, except SHCNT. Shift
  counts are always treated as unsigned, passing negative value to shl
  doesn't get you a shr.

      int size = (b32 ? 32 : 16);
  if (shl) {
      res = SRC1 << SRC2; // infinite precision, shift count doesn't wrap.
      if (SRC2 < size) { // yes, <. So if you shift 1 left by 32 bits, you DON'T get CDST.C set. but shift 2 left by 31 bits, and it gets set just fine.
          CDST.C = (res >> size) & 1; // basically, the bit that got shifted out.
      } else {
          CDST.C = 0;
      res = res & (b32 ? 0xffffffff : 0xffff);
  } else {
      res = SRC1 >> SRC2; // infinite precision, shift count doesn't wrap.
      if (signed && S(SRC1)) {
          if (SRC2 < size)
              res |= (1<<size)-(1<<(size-SRC2)); // fill out the upper bits with 1's.
              res |= (1<<size)-1;
      if (SRC2 < size && SRC2 > 0) {
          CDST.C = (SRC1 >> (SRC2-1)) & 1;
      } else {
          CDST.C = 0;
  if (SRC2 == 1) {
      CDST.O = (S(SRC1) != S(res));
  } else {
      CDST.O = 0;
  CDST.S = S(res);
  CDST.Z = res == 0;
  DST = res;

Long:     0x30000000 0xc0000000 base opcode
      0x00000000 0x20000000 0: shl, 1: shr
      0x00000000 0x08000000 0: u16/u32, 1: s16/s32 [shr only]
      0x00000000 0x04000000 0: b16, 1: b32
      0x00000000 0x00010000 0: use SRC2, 1: use SHCNT