Skip to content

Require specific array types for string ops #7810

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 16 additions & 24 deletions src/ir/child-typer.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,6 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
self().noteAnyTupleType(childp, arity);
}

// Used only for string.new_lossy_utf8_array to work around a missing type
// annotation in the stringref spec.
void noteAnyI8ArrayReferenceType(Expression** childp) {
self().noteAnyI8ArrayReferenceType(childp);
}

// Used only for string.new_wtf16_array to work around a missing type
// annotation in the stringref spec.
void noteAnyI16ArrayReferenceType(Expression** childp) {
self().noteAnyI16ArrayReferenceType(childp);
}

Type getLabelType(Name label) { return self().getLabelType(label); }

bool skipUnreachable() { return false; }
Expand Down Expand Up @@ -1180,16 +1168,18 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {

void visitStringNew(StringNew* curr) {
switch (curr->op) {
case StringNewLossyUTF8Array:
noteAnyI8ArrayReferenceType(&curr->ref);
case StringNewLossyUTF8Array: {
note(&curr->ref, Type(HeapTypes::getMutI8Array(), Nullable));
note(&curr->start, Type::i32);
note(&curr->end, Type::i32);
return;
case StringNewWTF16Array:
noteAnyI16ArrayReferenceType(&curr->ref);
}
case StringNewWTF16Array: {
note(&curr->ref, Type(HeapTypes::getMutI16Array(), Nullable));
note(&curr->start, Type::i32);
note(&curr->end, Type::i32);
return;
}
case StringNewFromCodePoint:
note(&curr->ref, Type::i32);
return;
Expand All @@ -1203,16 +1193,18 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
note(&curr->ref, Type(HeapType::string, Nullable));
}

void visitStringEncode(StringEncode* curr,
std::optional<HeapType> ht = std::nullopt) {
if (!ht) {
if (self().skipUnreachable() && !curr->array->type.isRef()) {
return;
void visitStringEncode(StringEncode* curr) {
note(&curr->str, Type(HeapType::string, Nullable));
switch (curr->op) {
case StringEncodeLossyUTF8Array: {
note(&curr->array, Type(HeapTypes::getMutI8Array(), Nullable));
break;
}
case StringEncodeWTF16Array: {
note(&curr->array, Type(HeapTypes::getMutI16Array(), Nullable));
break;
}
ht = curr->array->type.getHeapType();
}
note(&curr->str, Type(HeapType::string, Nullable));
note(&curr->array, Type(*ht, Nullable));
note(&curr->start, Type::i32);
}

Expand Down
20 changes: 10 additions & 10 deletions src/passes/Precompute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,19 +217,19 @@ class PrecomputingExpressionRunner
return Flow(NONCONSTANT_FLOW);
}

// string.encode_wtf16_array is effectively an Array read operation, so
// just like ArrayGet above we must check for immutability.
// string.new_wtf16_array is effectively an Array read operation, so
// we cannot optimize mutable arrays. Unfortunately, it is only valid with
// mutable arrays, so we cannot generally precompute it. As a special
// exception, we can precompute if the child is an array allocation because
// then we know the allocation will not escape anywhere else.
auto refType = curr->ref->type;
if (refType.isRef()) {
auto heapType = refType.getHeapType();
if (heapType.isArray()) {
if (heapType.getArray().element.mutable_ == Immutable) {
return Super::visitStringNew(curr);
}
}
if (refType.isRef() &&
(curr->ref->is<ArrayNew>() || curr->ref->is<ArrayNewData>() ||
curr->ref->is<ArrayNewFixed>())) {
return Super::visitStringNew(curr);
}

// Otherwise, this is mutable or unreachable or otherwise uninteresting.
// TODO: Handle more general cases as well.
return Flow(NONCONSTANT_FLOW);
}

Expand Down
20 changes: 2 additions & 18 deletions src/passes/StringLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@

#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/subtype-exprs.h"
#include "ir/type-updating.h"
#include "ir/utils.h"
#include "pass.h"
#include "passes/string-utils.h"
#include "support/string.h"
#include "wasm-builder.h"
#include "wasm-type.h"
#include "wasm.h"

namespace wasm {
Expand Down Expand Up @@ -283,7 +283,7 @@ struct StringLowering : public StringGathering {
}

// Common types used in imports.
Type nullArray16 = Type(Array(Field(Field::i16, Mutable)), Nullable);
Type nullArray16 = Type(HeapTypes::getMutI16Array(), Nullable);
Type nullExt = Type(HeapType::ext, Nullable);
Type nnExt = Type(HeapType::ext, NonNullable);

Expand Down Expand Up @@ -337,22 +337,6 @@ struct StringLowering : public StringGathering {
// Strings turn into externref.
updates[HeapType::string] = HeapType::ext;

// The module may have its own array16 type inside a big rec group, but
// imported strings expects that type in its own rec group as part of the
// ABI. Fix that up here. (This is valid to do as this type has no sub- or
// super-types anyhow; it is "plain old data" for communicating with the
// outside.)
auto allTypes = ModuleUtils::collectHeapTypes(*module);
auto array16 = nullArray16.getHeapType();
auto array16Element = array16.getArray().element;
for (auto type : allTypes) {
// Match an array type with no super and that is closed.
if (type.isArray() && !type.getDeclaredSuperType() && !type.isOpen() &&
type.getArray().element == array16Element) {
updates[type] = array16;
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we still need this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, because now it's no longer possible for any array operation to take a reference to an array type other than the one this code is introducing. Basically we're requiring that this replacement would be a no-op by construction.


TypeMapper(*module, updates).map();
}

Expand Down
10 changes: 2 additions & 8 deletions src/tools/fuzzing/fuzzing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3677,7 +3677,7 @@ Expression* TranslateToFuzzReader::makeCompoundRef(Type type) {
}

Expression* TranslateToFuzzReader::makeStringNewArray() {
auto* array = makeTrappingRefUse(getArrayTypeForString());
auto* array = makeTrappingRefUse(HeapTypes::getMutI16Array());
auto* start = make(Type::i32);
auto* end = make(Type::i32);
return builder.makeStringNew(StringNewWTF16Array, array, start, end);
Expand Down Expand Up @@ -5161,7 +5161,7 @@ Expression* TranslateToFuzzReader::makeStringEncode(Type type) {
assert(type == Type::i32);

auto* ref = makeTrappingRefUse(HeapType::string);
auto* array = makeTrappingRefUse(getArrayTypeForString());
auto* array = makeTrappingRefUse(HeapTypes::getMutI16Array());
auto* start = make(Type::i32);

// Only rarely emit without a bounds check, which might trap. See related
Expand Down Expand Up @@ -5625,12 +5625,6 @@ Type TranslateToFuzzReader::getSuperType(Type type) {
return superType;
}

HeapType TranslateToFuzzReader::getArrayTypeForString() {
// Emit an array that can be used with JS-style strings, containing 16-bit
// elements. For now, this must be a mutable type as that is all V8 accepts.
return HeapType(Array(Field(Field::PackedType::i16, Mutable)));
}

Name TranslateToFuzzReader::getTargetName(Expression* target) {
if (auto* block = target->dynCast<Block>()) {
return block->name;
Expand Down
5 changes: 5 additions & 0 deletions src/wasm-type.h
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,11 @@ constexpr HeapType nofunc = HeapType::nofunc;
constexpr HeapType nocont = HeapType::nocont;
constexpr HeapType noexn = HeapType::noexn;

// Certain heap types are used by standard operations. Provide central accessors
// for them to avoid having to build them everywhere they are used.
HeapType getMutI8Array();
HeapType getMutI16Array();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be in ir/array*.h perhaps?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will eventually get more types, not all of which are arrays. In particular I'm thinking of (type (struct (field (mut i32)))), which we will need for waitqueueref.


} // namespace HeapTypes

// A recursion group consisting of one or more HeapTypes. HeapTypes with single
Expand Down
31 changes: 1 addition & 30 deletions src/wasm/wasm-ir-builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,24 +518,6 @@ struct IRBuilder::ChildPopper
if (!Type::isSubType(type, *bound)) {
return true;
}
} else if (constraint.isAnyI8ArrayReference()) {
bool isI8Array =
type.isRef() && type.getHeapType().isArray() &&
type.getHeapType().getArray().element.packedType == Field::i8;
bool isNone =
type.isRef() && type.getHeapType().isMaybeShared(HeapType::none);
if (!isI8Array && !isNone && type != Type::unreachable) {
return true;
}
} else if (constraint.isAnyI16ArrayReference()) {
bool isI16Array =
type.isRef() && type.getHeapType().isArray() &&
type.getHeapType().getArray().element.packedType == Field::i16;
bool isNone =
type.isRef() && type.getHeapType().isMaybeShared(HeapType::none);
if (!isI16Array && !isNone && type != Type::unreachable) {
return true;
}
} else {
WASM_UNREACHABLE("unexpected constraint");
}
Expand Down Expand Up @@ -686,13 +668,6 @@ struct IRBuilder::ChildPopper
return popConstrainedChildren(children);
}

Result<> visitStringEncode(StringEncode* curr,
std::optional<HeapType> ht = std::nullopt) {
std::vector<Child> children;
ConstraintCollector{builder, children}.visitStringEncode(curr, ht);
return popConstrainedChildren(children);
}

Result<> visitCallRef(CallRef* curr,
std::optional<HeapType> ht = std::nullopt) {
std::vector<Child> children;
Expand Down Expand Up @@ -2488,11 +2463,7 @@ Result<> IRBuilder::makeStringMeasure(StringMeasureOp op) {
Result<> IRBuilder::makeStringEncode(StringEncodeOp op) {
StringEncode curr;
curr.op = op;
// There's no type annotation on these instructions due to a bug in the
// stringref proposal, so we just fudge it and pass `array` instead of a
// defined heap type. This will allow us to pop a child with an invalid
// array type, but that's just too bad.
CHECK_ERR(ChildPopper{*this}.visitStringEncode(&curr, HeapType::array));
CHECK_ERR(visitStringEncode(&curr));
push(builder.makeStringEncode(op, curr.str, curr.array, curr.start));
return Ok{};
}
Expand Down
30 changes: 18 additions & 12 deletions src/wasm/wasm-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2721,22 +2721,28 @@ void TypeBuilder::dump() {
}

std::unordered_set<HeapType> getIgnorablePublicTypes() {
auto array8 = Array(Field(Field::i8, Mutable));
auto array16 = Array(Field(Field::i16, Mutable));
TypeBuilder builder(2);
builder[0] = array8;
builder[1] = array16;
auto result = builder.build();
assert(result);
std::unordered_set<HeapType> ret;
for (auto type : *result) {
ret.insert(type);
}
return ret;
std::unordered_set<HeapType> set;
set.insert(HeapTypes::getMutI8Array());
set.insert(HeapTypes::getMutI16Array());
return set;
}

} // namespace wasm

namespace wasm::HeapTypes {

HeapType getMutI8Array() {
static HeapType i8Array = Array(Field(Field::i8, Mutable));
return i8Array;
}

HeapType getMutI16Array() {
static HeapType i16Array = Array(Field(Field::i16, Mutable));
return i16Array;
}

} // namespace wasm::HeapTypes

namespace std {

template<> class hash<wasm::Tuple> {
Expand Down
44 changes: 39 additions & 5 deletions src/wasm/wasm-validator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "ir/stack-utils.h"
#include "ir/utils.h"
#include "support/colors.h"
#include "wasm-type.h"
#include "wasm-validator.h"
#include "wasm.h"

Expand Down Expand Up @@ -3851,11 +3852,19 @@ void FunctionValidator::visitStringNew(StringNew* curr) {
refType.isRef(), curr, "string.new input must have array type")) {
return;
}
auto heapType = refType.getHeapType();
if (!shouldBeTrue(heapType.isBottom() || heapType.isArray(),
curr,
"string.new input must have array type")) {
return;
if (curr->op == StringNewLossyUTF8Array) {
shouldBeSubType(
refType,
Type(HeapTypes::getMutI8Array(), Nullable),
curr,
"string.new_lossy_utf8_array input must have proper i8 array type");
} else {
assert(curr->op == StringNewWTF16Array);
shouldBeSubType(
refType,
Type(HeapTypes::getMutI16Array(), Nullable),
curr,
"string.new_wtf16_array input must have proper i16 array type");
}
shouldBeEqualOrFirstIsUnreachable(curr->start->type,
Type(Type::i32),
Expand Down Expand Up @@ -3896,6 +3905,31 @@ void FunctionValidator::visitStringEncode(StringEncode* curr) {
shouldBeTrue(!getModule() || getModule()->features.hasStrings(),
curr,
"string operations require strings [--enable-strings]");
shouldBeSubTypeIgnoringShared(curr->str->type,
Type(HeapType::ext, Nullable),
curr,
"string.encode input should be an externref");
switch (curr->op) {
case StringEncodeLossyUTF8Array:
shouldBeSubType(
curr->array->type,
Type(HeapTypes::getMutI8Array(), Nullable),
curr,
"string.encode_lossy_utf8_array should have mutable i8 array");
break;
case StringEncodeWTF16Array: {
shouldBeSubType(
curr->array->type,
Type(HeapTypes::getMutI16Array(), Nullable),
curr,
"string.encode_wtf16_array should have mutable i16 array");
break;
}
}
shouldBeSubType(curr->start->type,
Type(Type::i32),
curr,
"string.encode start should be an i32");
}

void FunctionValidator::visitStringConcat(StringConcat* curr) {
Expand Down
Loading
Loading