Skip to content

Support varlen encoding for arbitrary int and uint numbers #312

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 19, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 39 additions & 23 deletions py/emitbc.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,30 +108,46 @@ STATIC void emit_write_byte_code_byte_byte(emit_t* emit, byte b1, uint b2) {
}

STATIC void emit_write_byte_code_uint(emit_t* emit, uint num) {
if (num <= 127) { // fits in 0x7f
// fit argument in single byte
byte* c = emit_get_cur_to_write_byte_code(emit, 1);
c[0] = num;
} else if (num <= 16383) { // fits in 0x3fff
// fit argument in two bytes
byte* c = emit_get_cur_to_write_byte_code(emit, 2);
c[0] = (num >> 8) | 0x80;
c[1] = num;
} else {
// larger numbers not implemented/supported
assert(0);
}
}

// integers (for small ints) are stored as 24 bits, in excess
// We store each 7 bits in a separate byte, and that's how many bytes needed
byte buf[(BYTES_PER_WORD * 8 + 7) / 7];
byte *p = buf + sizeof(buf);
// We encode in little-ending order, but store in big-endian, to help decoding
do {
*--p = num & 0x7f;
num >>= 7;
} while (num != 0);
byte* c = emit_get_cur_to_write_byte_code(emit, buf + sizeof(buf) - p);
while (p != buf + sizeof(buf) - 1) {
*c++ = *p++ | 0x80;
}
*c = *p;
}

// Similar to emit_write_byte_code_uint(), just some extra handling to encode sign
STATIC void emit_write_byte_code_byte_int(emit_t* emit, byte b1, machine_int_t num) {
num += 0x800000;
assert(0 <= num && num <= 0xffffff);
byte* c = emit_get_cur_to_write_byte_code(emit, 4);
c[0] = b1;
c[1] = num;
c[2] = num >> 8;
c[3] = num >> 16;
emit_write_byte_code_byte(emit, b1);

// We store each 7 bits in a separate byte, and that's how many bytes needed
byte buf[(BYTES_PER_WORD * 8 + 7) / 7];
byte *p = buf + sizeof(buf);
// We encode in little-ending order, but store in big-endian, to help decoding
do {
*--p = num & 0x7f;
num >>= 7;
} while (num != 0 && num != -1);
// Make sure that highest bit we stored (mask 0x40) matches sign
// of the number. If not, store extra byte just to encode sign
if (num == -1 && (*p & 0x40) == 0) {
*--p = 0x7f;
} else if (num == 0 && (*p & 0x40) != 0) {
*--p = 0;
}

byte* c = emit_get_cur_to_write_byte_code(emit, buf + sizeof(buf) - p);
while (p != buf + sizeof(buf) - 1) {
*c++ = *p++ | 0x80;
}
*c = *p;
}

STATIC void emit_write_byte_code_byte_uint(emit_t* emit, byte b, uint num) {
Expand Down
17 changes: 12 additions & 5 deletions py/showbc.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,18 @@ void mp_byte_code_print(const byte *ip, int len) {
printf("LOAD_CONST_ELLIPSIS");
break;

case MP_BC_LOAD_CONST_SMALL_INT:
unum = (ip[0] | (ip[1] << 8) | (ip[2] << 16)) - 0x800000;
ip += 3;
printf("LOAD_CONST_SMALL_INT %d", (int)unum);
break;
case MP_BC_LOAD_CONST_SMALL_INT: {
int num = 0;
if ((ip[0] & 0x40) != 0) {
// Number is negative
num--;
}
do {
num = (num << 7) | (*ip & 0x7f);
} while ((*ip++ & 0x80) != 0);
printf("LOAD_CONST_SMALL_INT %d", num);
break;
}

case MP_BC_LOAD_CONST_INT:
DECODE_QSTR;
Expand Down
29 changes: 23 additions & 6 deletions py/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,20 @@ typedef enum {
UNWIND_JUMP,
} mp_unwind_reason_t;

#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0)
#define DECODE_UINT { \
unum = 0; \
do { \
unum = (unum << 7) + (*ip & 0x7f); \
} while ((*ip++ & 0x80) != 0); \
}
#define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0)
#define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0)
#define DECODE_QSTR do { qst = *ip++; if (qst > 127) { qst = ((qst & 0x3f) << 8) | (*ip++); } } while (0)
#define DECODE_QSTR { \
qst = 0; \
do { \
qst = (qst << 7) + (*ip & 0x7f); \
} while ((*ip++ & 0x80) != 0); \
}
#define PUSH(val) *++sp = (val)
#define POP() (*sp--)
#define TOP() (*sp)
Expand Down Expand Up @@ -146,11 +156,18 @@ mp_vm_return_kind_t mp_execute_byte_code_2(const byte *code_info, const byte **i
PUSH(mp_const_ellipsis);
break;

case MP_BC_LOAD_CONST_SMALL_INT:
unum = (ip[0] | (ip[1] << 8) | (ip[2] << 16)) - 0x800000;
ip += 3;
PUSH(MP_OBJ_NEW_SMALL_INT(unum));
case MP_BC_LOAD_CONST_SMALL_INT: {
int num = 0;
if ((ip[0] & 0x40) != 0) {
// Number is negative
num--;
}
do {
num = (num << 7) | (*ip & 0x7f);
} while ((*ip++ & 0x80) != 0);
PUSH(MP_OBJ_NEW_SMALL_INT(num));
break;
}

case MP_BC_LOAD_CONST_INT:
DECODE_QSTR;
Expand Down