Skip to content

Commit 05005f6

Browse files
committed
py: Remove mp_obj_str_builder and use vstr instead.
With this patch str/bytes construction is streamlined. Always use a vstr to build a str/bytes object. If the size is known beforehand then use vstr_init_len to allocate only required memory. Otherwise use vstr_init and the vstr will grow as needed. Then use mp_obj_new_str_from_vstr to create a str/bytes object using the vstr memory. Saves code ROM: 68 bytes on stmhal, 108 bytes on bare-arm, and 336 bytes on unix x64.
1 parent 0b9ee86 commit 05005f6

File tree

18 files changed

+130
-143
lines changed

18 files changed

+130
-143
lines changed

extmod/modubinascii.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ STATIC mp_obj_t mod_binascii_hexlify(mp_uint_t n_args, const mp_obj_t *args) {
3939
mp_buffer_info_t bufinfo;
4040
mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
4141

42-
byte *in = bufinfo.buf, *out;
43-
mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, bufinfo.len * 2, &out);
42+
vstr_t vstr;
43+
vstr_init_len(&vstr, bufinfo.len * 2);
44+
byte *in = bufinfo.buf, *out = (byte*)vstr.buf;
4445
for (mp_uint_t i = bufinfo.len; i--;) {
4546
byte d = (*in >> 4);
4647
if (d > 9) {
@@ -53,7 +54,7 @@ STATIC mp_obj_t mod_binascii_hexlify(mp_uint_t n_args, const mp_obj_t *args) {
5354
}
5455
*out++ = d + '0';
5556
}
56-
return mp_obj_str_builder_end(o);
57+
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
5758
}
5859
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_binascii_hexlify_obj, 1, 2, mod_binascii_hexlify);
5960

extmod/moduhashlib.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ MP_DEFINE_CONST_FUN_OBJ_2(hash_update_obj, hash_update);
6363

6464
STATIC mp_obj_t hash_digest(mp_obj_t self_in) {
6565
mp_obj_hash_t *self = self_in;
66-
byte *hash;
67-
mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, SHA256_BLOCK_SIZE, &hash);
68-
sha256_final((SHA256_CTX*)self->state, hash);
69-
return mp_obj_str_builder_end(o);
66+
vstr_t vstr;
67+
vstr_init_len(&vstr, SHA256_BLOCK_SIZE);
68+
sha256_final((SHA256_CTX*)self->state, (byte*)vstr.buf);
69+
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
7070
}
7171
MP_DEFINE_CONST_FUN_OBJ_1(hash_digest_obj, hash_digest);
7272

py/compile.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2624,8 +2624,9 @@ STATIC void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
26242624
}
26252625

26262626
// concatenate string/bytes
2627-
byte *s_dest;
2628-
mp_obj_t obj = mp_obj_str_builder_start(string_kind == MP_PARSE_NODE_STRING ? &mp_type_str : &mp_type_bytes, n_bytes, &s_dest);
2627+
vstr_t vstr;
2628+
vstr_init_len(&vstr, n_bytes);
2629+
byte *s_dest = (byte*)vstr.buf;
26292630
for (int i = 0; i < n; i++) {
26302631
if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
26312632
mp_uint_t s_len;
@@ -2640,7 +2641,7 @@ STATIC void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
26402641
}
26412642

26422643
// load the object
2643-
EMIT_ARG(load_const_obj, mp_obj_str_builder_end(obj));
2644+
EMIT_ARG(load_const_obj, mp_obj_new_str_from_vstr(string_kind == MP_PARSE_NODE_STRING ? &mp_type_str : &mp_type_bytes, &vstr));
26442645
}
26452646

26462647
// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node

py/misc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ typedef struct _vstr_t {
124124
#define VSTR_FIXED(vstr, alloc) vstr_t vstr; char vstr##_buf[(alloc)]; vstr_init_fixed_buf(&vstr, (alloc), vstr##_buf);
125125

126126
void vstr_init(vstr_t *vstr, size_t alloc);
127+
void vstr_init_len(vstr_t *vstr, size_t len);
127128
void vstr_init_fixed_buf(vstr_t *vstr, size_t alloc, char *buf);
128129
void vstr_clear(vstr_t *vstr);
129130
vstr_t *vstr_new(void);

py/modstruct.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,9 @@ STATIC mp_obj_t struct_pack(mp_uint_t n_args, const mp_obj_t *args) {
161161
const char *fmt = mp_obj_str_get_str(args[0]);
162162
char fmt_type = get_fmt_type(&fmt);
163163
mp_int_t size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0]));
164-
byte *p;
165-
mp_obj_t res = mp_obj_str_builder_start(&mp_type_bytes, size, &p);
164+
vstr_t vstr;
165+
vstr_init_len(&vstr, size);
166+
byte *p = (byte*)vstr.buf;
166167
memset(p, 0, size);
167168

168169
for (mp_uint_t i = 1; i < n_args; i++) {
@@ -190,7 +191,8 @@ STATIC mp_obj_t struct_pack(mp_uint_t n_args, const mp_obj_t *args) {
190191
mp_binary_set_val(fmt_type, *fmt++, args[i], &p);
191192
}
192193
}
193-
return res;
194+
195+
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
194196
}
195197
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(struct_pack_obj, 1, MP_OBJ_FUN_ARGS_MAX, struct_pack);
196198

py/obj.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -500,9 +500,6 @@ mp_obj_t mp_alloc_emergency_exception_buf(mp_obj_t size_in);
500500
void mp_init_emergency_exception_buf(void);
501501

502502
// str
503-
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, mp_uint_t len, byte **data);
504-
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in);
505-
mp_obj_t mp_obj_str_builder_end_with_len(mp_obj_t o_in, mp_uint_t len);
506503
bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
507504
mp_uint_t mp_obj_str_get_hash(mp_obj_t self_in);
508505
mp_uint_t mp_obj_str_get_len(mp_obj_t self_in);

py/objint.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,9 @@ STATIC mp_obj_t int_to_bytes(mp_uint_t n_args, const mp_obj_t *args) {
340340
mp_int_t val = mp_obj_int_get_checked(args[0]);
341341
mp_uint_t len = MP_OBJ_SMALL_INT_VALUE(args[1]);
342342

343-
byte *data;
344-
mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
343+
vstr_t vstr;
344+
vstr_init_len(&vstr, len);
345+
byte *data = (byte*)vstr.buf;
345346
memset(data, 0, len);
346347

347348
if (MP_ENDIANNESS_LITTLE) {
@@ -353,7 +354,7 @@ STATIC mp_obj_t int_to_bytes(mp_uint_t n_args, const mp_obj_t *args) {
353354
}
354355
}
355356

356-
return mp_obj_str_builder_end(o);
357+
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
357358
}
358359
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 2, 4, int_to_bytes);
359360

py/objstr.c

Lines changed: 25 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,10 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, mp_uint_t n_args, mp_uint_t n_k
200200

201201
if (MP_OBJ_IS_SMALL_INT(args[0])) {
202202
uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
203-
byte *data;
204-
205-
mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
206-
memset(data, 0, len);
207-
return mp_obj_str_builder_end(o);
203+
vstr_t vstr;
204+
vstr_init_len(&vstr, len);
205+
memset(vstr.buf, 0, len);
206+
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
208207
}
209208

210209
// check if argument has the buffer protocol
@@ -302,10 +301,10 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
302301
return mp_const_empty_bytes;
303302
}
304303
}
305-
byte *data;
306-
mp_obj_t s = mp_obj_str_builder_start(lhs_type, lhs_len * n, &data);
307-
mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
308-
return mp_obj_str_builder_end(s);
304+
vstr_t vstr;
305+
vstr_init_len(&vstr, lhs_len * n);
306+
mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, vstr.buf);
307+
return mp_obj_new_str_from_vstr(lhs_type, &vstr);
309308
}
310309

311310
// From now on all operations allow:
@@ -344,12 +343,11 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
344343
switch (op) {
345344
case MP_BINARY_OP_ADD:
346345
case MP_BINARY_OP_INPLACE_ADD: {
347-
mp_uint_t alloc_len = lhs_len + rhs_len;
348-
byte *data;
349-
mp_obj_t s = mp_obj_str_builder_start(lhs_type, alloc_len, &data);
350-
memcpy(data, lhs_data, lhs_len);
351-
memcpy(data + lhs_len, rhs_data, rhs_len);
352-
return mp_obj_str_builder_end(s);
346+
vstr_t vstr;
347+
vstr_init_len(&vstr, lhs_len + rhs_len);
348+
memcpy(vstr.buf, lhs_data, lhs_len);
349+
memcpy(vstr.buf + lhs_len, rhs_data, rhs_len);
350+
return mp_obj_new_str_from_vstr(lhs_type, &vstr);
353351
}
354352

355353
case MP_BINARY_OP_IN:
@@ -441,8 +439,9 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
441439
}
442440

443441
// make joined string
444-
byte *data;
445-
mp_obj_t joined_str = mp_obj_str_builder_start(self_type, required_len, &data);
442+
vstr_t vstr;
443+
vstr_init_len(&vstr, required_len);
444+
byte *data = (byte*)vstr.buf;
446445
for (mp_uint_t i = 0; i < seq_len; i++) {
447446
if (i > 0) {
448447
memcpy(data, sep_str, sep_len);
@@ -454,7 +453,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
454453
}
455454

456455
// return joined string
457-
return mp_obj_str_builder_end(joined_str);
456+
return mp_obj_new_str_from_vstr(self_type, &vstr);
458457
}
459458

460459
#define is_ws(c) ((c) == ' ' || (c) == '\t')
@@ -1485,7 +1484,7 @@ STATIC mp_obj_t str_replace(mp_uint_t n_args, const mp_obj_t *args) {
14851484

14861485
// data for the replaced string
14871486
byte *data = NULL;
1488-
mp_obj_t replaced_str = MP_OBJ_NULL;
1487+
vstr_t vstr;
14891488

14901489
// do 2 passes over the string:
14911490
// first pass computes the required length of the replaced string
@@ -1537,7 +1536,8 @@ STATIC mp_obj_t str_replace(mp_uint_t n_args, const mp_obj_t *args) {
15371536
return args[0];
15381537
} else {
15391538
// substr found, allocate new string
1540-
replaced_str = mp_obj_str_builder_start(self_type, replaced_str_index, &data);
1539+
vstr_init_len(&vstr, replaced_str_index);
1540+
data = (byte*)vstr.buf;
15411541
assert(data != NULL);
15421542
}
15431543
} else {
@@ -1546,7 +1546,7 @@ STATIC mp_obj_t str_replace(mp_uint_t n_args, const mp_obj_t *args) {
15461546
}
15471547
}
15481548

1549-
return mp_obj_str_builder_end(replaced_str);
1549+
return mp_obj_new_str_from_vstr(self_type, &vstr);
15501550
}
15511551

15521552
STATIC mp_obj_t str_count(mp_uint_t n_args, const mp_obj_t *args) {
@@ -1643,13 +1643,13 @@ STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
16431643
// Supposedly not too critical operations, so optimize for code size
16441644
STATIC mp_obj_t str_caseconv(unichar (*op)(unichar), mp_obj_t self_in) {
16451645
GET_STR_DATA_LEN(self_in, self_data, self_len);
1646-
byte *data;
1647-
mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(self_in), self_len, &data);
1646+
vstr_t vstr;
1647+
vstr_init_len(&vstr, self_len);
1648+
byte *data = (byte*)vstr.buf;
16481649
for (mp_uint_t i = 0; i < self_len; i++) {
16491650
*data++ = op(*self_data++);
16501651
}
1651-
*data = 0;
1652-
return mp_obj_str_builder_end(s);
1652+
return mp_obj_new_str_from_vstr(mp_obj_get_type(self_in), &vstr);
16531653
}
16541654

16551655
STATIC mp_obj_t str_lower(mp_obj_t self_in) {
@@ -1856,35 +1856,6 @@ const mp_obj_type_t mp_type_bytes = {
18561856
// the zero-length bytes
18571857
const mp_obj_str_t mp_const_empty_bytes_obj = {{&mp_type_bytes}, 0, 0, NULL};
18581858

1859-
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, mp_uint_t len, byte **data) {
1860-
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
1861-
o->base.type = type;
1862-
o->len = len;
1863-
o->hash = 0;
1864-
byte *p = m_new(byte, len + 1);
1865-
o->data = p;
1866-
*data = p;
1867-
return o;
1868-
}
1869-
1870-
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
1871-
mp_obj_str_t *o = o_in;
1872-
o->hash = qstr_compute_hash(o->data, o->len);
1873-
byte *p = (byte*)o->data;
1874-
p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
1875-
return o;
1876-
}
1877-
1878-
mp_obj_t mp_obj_str_builder_end_with_len(mp_obj_t o_in, mp_uint_t len) {
1879-
mp_obj_str_t *o = o_in;
1880-
o->data = m_renew(byte, (byte*)o->data, o->len + 1, len + 1);
1881-
o->len = len;
1882-
o->hash = qstr_compute_hash(o->data, o->len);
1883-
byte *p = (byte*)o->data;
1884-
p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
1885-
return o;
1886-
}
1887-
18881859
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, mp_uint_t len) {
18891860
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
18901861
o->base.type = type;

py/stream.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,12 @@ STATIC mp_obj_t stream_read(mp_uint_t n_args, const mp_obj_t *args) {
160160
}
161161
#endif
162162

163-
byte *buf;
164-
mp_obj_t ret_obj = mp_obj_str_builder_start(STREAM_CONTENT_TYPE(o->type->stream_p), sz, &buf);
163+
vstr_t vstr;
164+
vstr_init_len(&vstr, sz);
165165
int error;
166-
mp_uint_t out_sz = o->type->stream_p->read(o, buf, sz, &error);
166+
mp_uint_t out_sz = o->type->stream_p->read(o, vstr.buf, sz, &error);
167167
if (out_sz == MP_STREAM_ERROR) {
168+
vstr_clear(&vstr);
168169
if (is_nonblocking_error(error)) {
169170
// https://docs.python.org/3.4/library/io.html#io.RawIOBase.read
170171
// "If the object is in non-blocking mode and no bytes are available,
@@ -175,7 +176,9 @@ STATIC mp_obj_t stream_read(mp_uint_t n_args, const mp_obj_t *args) {
175176
}
176177
nlr_raise(mp_obj_new_exception_arg1(&mp_type_OSError, MP_OBJ_NEW_SMALL_INT(error)));
177178
} else {
178-
return mp_obj_str_builder_end_with_len(ret_obj, out_sz);
179+
vstr.len = out_sz;
180+
vstr.buf[vstr.len] = '\0';
181+
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(o->type->stream_p), &vstr);
179182
}
180183
}
181184

@@ -252,7 +255,7 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
252255
vstr_t vstr;
253256
vstr_init(&vstr, DEFAULT_BUFFER_SIZE);
254257
char *p = vstr.buf;
255-
mp_uint_t current_read = DEFAULT_BUFFER_SIZE;
258+
mp_uint_t current_read = DEFAULT_BUFFER_SIZE - 1; // save 1 byte for null termination
256259
while (true) {
257260
int error;
258261
mp_uint_t out_sz = o->type->stream_p->read(self_in, p, current_read, &error);
@@ -276,8 +279,8 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
276279
current_read -= out_sz;
277280
p += out_sz;
278281
} else {
279-
current_read = DEFAULT_BUFFER_SIZE;
280-
p = vstr_extend(&vstr, current_read);
282+
p = vstr_extend(&vstr, DEFAULT_BUFFER_SIZE);
283+
current_read = DEFAULT_BUFFER_SIZE - 1; // save 1 byte for null termination
281284
if (p == NULL) {
282285
// TODO
283286
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OSError/*&mp_type_RuntimeError*/, "Out of memory"));
@@ -286,7 +289,7 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
286289
}
287290

288291
vstr.len = total_size;
289-
vstr.buf[vstr.len] = '\0'; // XXX is there enough space?
292+
vstr.buf[vstr.len] = '\0';
290293
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(o->type->stream_p), &vstr);
291294
}
292295

py/vstr.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ void vstr_init(vstr_t *vstr, size_t alloc) {
5252
vstr->fixed_buf = false;
5353
}
5454

55+
void vstr_init_len(vstr_t *vstr, size_t len) {
56+
vstr_init(vstr, len + 1);
57+
vstr_add_len(vstr, len);
58+
}
59+
5560
void vstr_init_fixed_buf(vstr_t *vstr, size_t alloc, char *buf) {
5661
assert(alloc > 0); // need at least room for the null byte
5762
vstr->alloc = alloc;

0 commit comments

Comments
 (0)