上一篇文章从整体上讨论了AssemblyScript(后文简称AS)程序如何被编译成WebAssembly(后文简称Wasm)模块,详细介绍了AS语言各种要素如何映射到Wasm二进制模块的各个段。这一篇文章将调整焦距,把焦点对准函数。我们将讨论AS编译器如何使用Wasm指令集来实现各种语法要素。在开始之前,我们先简单回顾一下Wasm指令集(关于Wasm模块和指令集的详细介绍可以参考之前的系列文章):
Wasm采用栈式虚拟机(Stack Based Virtual Machine)以及字节码(Bytecode),其指令可以分为五大类:
- 控制指令(Control Instructions),包括结构化控制指令、跳转指令、函数调用指令等。
- 参数指令(Parametric Instructions),只有两条:
drop
和select
。 - 变量指令(Variable Instructions),包括局部变量指令和全局变量指令。
- 内存指令(Memory Instructions),包括存储指令、加载指令等。
- 数值指令(Numeric Instructions),包括常量指令、测试指令、比较指令、一元运算指令、二元运算指令、类型转换指令。
接下来将结合实例代码详细介绍AS编译器如何利用这五类指令。为了便于测试,后文给出的部分示例代码调用了外部函数。这些外部函数只是为了配合示例代码,因此它们的实现并不重要。下面统一给出这些外部函数的声明:
declare function printI32(n: i32): void;
declare function printI64(n: i64): void;
declare function printF32(n: f32): void;
declare function printF64(n: f64): void;
declare function randomI32(): i32;
控制指令
如前所述,Wasm控制指令包括结构化控制指令(block
、loop
、if-else
)、跳转指令(br
、br_if
、br_table
、return
)、函数调用指令(call
、call_indirerct
),以及nop
和unreachable
。其中结构化控制指令和跳转指令配合可以实现AS语言的各种控制语句,例如if-else
语句、for
循环语句、switch-case
语句等。call
指令可以实现AS函数调用,call_indirerct
指令则可以支持一等函数(First-class Function)。
AS语言的if-else
语句可以直接使用Wasm的if-else
指令实现,下面是一个例子:
export function printEven(n: i32): void {
if (n % 2 == 0) {
printI32(1);
} else {
printI32(0);
}
}
下面是编译结果(已经将编译后的函数字节码反编译为WAT,后文不再赘述):
(func $printEven (type 0) (param i32)
(if
(i32.rem_s (local.get 0) (i32.const 2))
(then (call $printI32 (i32.const 0)))
(else (call $printI32 (i32.const 1)))
)
)
上面例子也展示了call
指令的用法,后面就不再单独介绍了。顺便说一下,一些简单的if-else
语句会被AS编译器优化为select
指令,下面是一个例子:
export function max(a: i32, b: i32): i32 {
if (a > b) {
return a;
} else {
return b;
}
}
下面是编译结果:
(func $max (type 2) (param i32 i32) (result i32)
(select
(local.get 0)
(local.get 1)
(i32.gt_s (local.get 0) (local.get 1))
)
)
AS语言的for
、while
、do-while
等循环语句可以用Wasm的loop
指令实现。注意loop
指令并不能自动形成循环,所以必须要和br
、br_if
或br_table
跳转指令一起使用。下面来看一个稍微复杂一点的例子:
export function printNums(n: i32): void {
for (let i: i32 = 0; i < n; i++) {
printI32(i);
if (i == 100) {
break;
}
}
}
这个例子展示了loop
、block
、br
和br_if
指令的用法,下面是编译结果:
(func $printNums (type 0) (param i32)
(local i32)
(loop ;; label = @1
(if ;; label = @2
(i32.lt_s (local.get 1) (local.get 0))
(then
(block ;; label = @3
(call $printI32 (local.get 1))
(br_if 0 (;@3;)
(i32.eq (local.get 1) (i32.const 100)))
(local.set 1
(i32.add (local.get 1) (i32.const 1)))
(br 2 (;@1;))
) ;; end of block
) ;; end of then
) ;; end of if
) ;; end of loop
)
AS语言的switch-case
语句可以用Wasm的br_table
指令来实现,下面是一个例子:
export function mul100(n: i32): i32 {
switch (n) {
case 1: return 100;
case 2: return 200;
case 3: return 300;
default: return n * 100;
}
}
除了br_table
指令,这个例子还展示了return
指令的用法,下面是编译结果:
(func $mul100 (type 1) (param i32) (result i32)
(block ;; label = @1
(block ;; label = @2
(block ;; label = @3
(block ;; label = @4
(br_table 0 (;@4;) 1 (;@3;) 2 (;@2;) 3 (;@1;)
(i32.sub (local.get 0) (i32.const 1))))
(return (i32.const 100)))
(return (i32.const 200)))
(return (i32.const 300)))
(i32.mul (local.get 0) (i32.const 100))
)
AS语言里的一等函数,和C/C++等语言中函数指针概念比较类似,可以用call_indirect
指令实现。下面来看一个例子:
type OP = (a: i32, b: i32) => i32;
function add(a: i32, b: i32): i32 { return a + b; }
function sub(a: i32, b: i32): i32 { return a - b; }
function mul(a: i32, b: i32): i32 { return a * b; }
function div(a: i32, b: i32): i32 { return a / b; }
export function calc(a: i32, b: i32, op: i32): i32 {
return getOp(op)(a, b);
}
function getOp(op: i32): OP {
switch (op) {
case 1: return add;
case 2: return sub;
case 3: return mul;
case 4: return div;
default: return add;
}
}
下面是编译结果,请注意观察table和elem段的内容,以及calc()
函数字节码:
(module
(type (;0;) (func (param i32 i32) (result i32)))
(type (;1;) (func (param i32) (result i32)))
(type (;2;) (func (param i32 i32 i32) (result i32)))
(func $add (type 0) (i32.add (local.get 0) (local.get 1)))
(func $sub (type 0) (i32.sub (local.get 0) (local.get 1)))
(func $mul (type 0) (i32.mul (local.get 0) (local.get 1)))
(func $div (type 0) (i32.div_s (local.get 0) (local.get 1)))
(func $getOp (type 1) (param i32) (result i32) (;; 省略 ;;))
(func $calc (type 2) (param i32 i32 i32) (result i32)
(call_indirect (type 0)
(local.get 0)
(local.get 1)
(call $getOp (local.get 2))
)
)
(table (;0;) 5 funcref)
(memory (;0;) 0)
(export "memory" (memory 0))
(export "calc" (func $calc))
(export "getOp" (func $getOp))
(elem (;0;) (i32.const 1) func $add $sub $mul $div)
)
最后让我们来看看unreachable
指令。AS计划在Wasm异常处理提案通过后再支持异常处理,目前抛出异常会导致abort()函数被调用。我们可以通过添加编译器选项--use abort=
来禁用abort,这样编译器就会将abort()
函数调用替换为一条unreachable
指令。除此之外,我们也可以通过直接调用低级的unreachable()
函数来显式插入一条unreachable
指令,下面是一个例子:
export function crash2(): void {
unreachable();
}
编译结果也很简单:
(func $crash2 (type 1)
(unreachable)
)
参数指令
参数指令较为简单,只有drop
和select
两条。其中select
指令在前面介绍if-else
语句时已经提到过了,这里就不再单独介绍了。drop
指令可以将操作数栈顶多余的操作数弹出扔掉,下面来看一个简单的例子:
export function dropRandom(): void {
randomI32();
}
编译结果也很简单:
(func $dropRandom (type 0)
(drop (call $randomI32))
)
变量指令
局部变量指令共三条:local.get
、local.set
和local.tee
。如果不考虑优化,每个AS函数都可以被编译器编译成一个Wasm函数。函数参数和局部变量的读写操作可以通过局部变量指令来完成,下面来看一个例子:
export function addLocals(a: i32, b: i32): i32 {
let c: i32 = a + b;
return c;
}
下面是编译结果(为了便于观察结果,在编译部分示例代码时关闭了编译器优化,后文不再赘述):
(func $addLocals (type 1) (param i32 i32) (result i32)
(local i32)
(local.set 2 (i32.add (local.get 0) (local.get 1)))
(local.get 2)
)
全局变量指令只有两条:global.get
和global.set
。AS语言的全局变量可以直接用Wasm全局变量来实现,全局变量的读写操作可以通过全局变量指令来完成,下面来看一个例子:
let a: i32;
let b: i32;
let c: i32;
export function addGlobals(): void {
c = a + b;
}
下面是完整的编译结果:
(module
(type (;0;) (func))
(func $addGlobals (type 0)
(global.set 2 (i32.add (global.get 0) (global.get 1)))
)
(global (;0;) (mut i32) (i32.const 0))
(global (;1;) (mut i32) (i32.const 0))
(global (;2;) (mut i32) (i32.const 0))
(export "addGlobals" (func $addGlobals))
)
内存指令
Wasm虚拟机可以附带一块虚拟内存,并且提供了丰富的指令来操作这块内存。其中load系列指令可以从内存加载数据,放入操作树栈。store系列指令可以从操作数栈拿出数据,存入内存。此外,通过memory.size
指令可以获取内存的当前页数,通过memory.grow
指令可以按页扩展内存。我们将通过一个简单的结构体来帮助我们观察内存指令的使用,下面是这个结构体的定义:
class S {
a: i8; b: u8; c: i16; d: u16; e: i32; f: u32; g: i64; h: u64;
i: f32; j: f64;
}
下面这个函数展示了i32
类型load指令的用法:
export function loadI32(s: S): void {
printI32(s.a as i32); // i32.load8_s
printI32(s.b as i32); // i32.load8_u
printI32(s.c as i32); // i32.load16_s
printI32(s.d as i32); // i32.load16_u
printI32(s.e as i32); // i32.load
printI32(s.f as i32); // i32.load
}
下面是编译结果。通过load指令的offset立即数可以看出,AS编译器并没有对结构体字段进行重新排列,但是进行了适当的对齐。
(func $loadI32 (type 0) (param i32)
(call $printI32 (i32.load8_s (local.get 0)))
(call $printI32 (i32.load8_u offset=1 (local.get 0)))
(call $printI32 (i32.load16_s offset=2 (local.get 0)))
(call $printI32 (i32.load16_u offset=4 (local.get 0)))
(call $printI32 (i32.load offset=8 (local.get 0)))
(call $printI32 (i32.load offset=12 (local.get 0)))
)
下面这个函数展示了i64
类型load指令的用法:
export function loadI64(s: S): void {
printI64(s.a as i64); // i64.load8_s?
printI64(s.b as i64); // i64.load8_u?
printI64(s.c as i64); // i64.load16_s?
printI64(s.d as i64); // i64.load16_u?
printI64(s.e as i64); // i64.load32_s?
printI64(s.f as i64); // i64.load32_u?
printI64(s.g as i64); // i64.load
printI64(s.h as i64); // i64.load
}
下面是编译结果。可以看到,预期使用i64
类型load指令的地方,AS编译器使用了i32
类型load指令并通过extend指令进行整数拉升。
(func $loadI64 (type 0) (param i32)
(call $printI64 (i64.extend_i32_s (i32.load8_s (local.get 0))))
(call $printI64 (i64.extend_i32_u (i32.load8_u offset=1 (local.get 0))))
(call $printI64 (i64.extend_i32_s (i32.load16_s offset=2 (local.get 0))))
(call $printI64 (i64.extend_i32_u (i32.load16_u offset=4 (local.get 0))))
(call $printI64 (i64.extend_i32_s (i32.load offset=8 (local.get 0))))
(call $printI64 (i64.extend_i32_u (i32.load offset=12 (local.get 0))))
(call $printI64 (i64.load offset=16 (local.get 0)))
(call $printI64 (i64.load offset=24 (local.get 0)))
)
下面这个函数展示了float类型load指令的用法:
export function loadF(s: S): void {
printF32(s.i); // f32.load
printF64(s.j); // f64.load
}
下面是编译结果:
(func $loadF (type 0) (param i32)
(call $printF32 (f32.load offset=32 (local.get 0)))
(call $printF64 (f64.load offset=40 (local.get 0)))
)
相比load指令,store指令较为简单。下面的例子展示了store指令的用法:
export function store(s: S, v: i64): void {
s.a = v as i8; // i32.store8
s.b = v as u8; // i32.store8
s.c = v as i16; // i32.store16
s.d = v as u16; // i32.store16
s.e = v as i32; // i32.store
s.f = v as u32; // i32.store
s.g = v as i64; // i64.store
s.h = v as u64; // i64.store
s.i = v as f32; // f32.store
s.j = v as f64; // f64.store
}
下面是编译结果:
(func $store (type 1) (param i32 i64)
(i32.store8 (local.get 0) (i32.wrap_i64 (local.get 1)))
(i32.store8 offset=1 (local.get 0) (i32.wrap_i64 (local.get 1)))
(i32.store16 offset=2 (local.get 0) (i32.wrap_i64 (local.get 1)))
(i32.store16 offset=4 (local.get 0) (i32.wrap_i64 (local.get 1)))
(i32.store offset=8 (local.get 0) (i32.wrap_i64 (local.get 1)))
(i32.store offset=12 (local.get 0) (i32.wrap_i64 (local.get 1)))
(i64.store offset=16 (local.get 0) (local.get 1))
(i64.store offset=24 (local.get 0) (local.get 1))
(f32.store offset=32 (local.get 0) (f32.convert_i64_s (local.get 1)))
(f64.store offset=40 (local.get 0) (f64.convert_i64_s (local.get 1)))
)
和前面介绍过的unreachable
指令一样,memory.size
和memory.grow
指令也可以通过内置函数来生成,下面是一个简单的例子:
export function sizeAndGrow(n: i32): void {
printI32(memory.size());
printI32(memory.grow(n));
}
下面是编译结果:
(func $sizeAndGrow (type 0) (param i32)
(call $printI32 (memory.size))
(call $printI32 (memory.grow (local.get 0)))
)
数值指令
如前文所述,数值指令又可以分为常量指令、测试指令、比较指令、一元和二元运算指令,以及类型转换指令。其中常量指令共四条,AS语言里的数值字面量(Literals)可以用常量指令实现,下面是一个例子:
export function consts(): void {
printI32(1234); // i32.const
printI64(5678); // i64.const
printF32(3.14); // f32.const
printF64(2.71); // f64.const
}
下面是编译结果:
(func consts (type 1)
(call $printI32 (i32.const 1234))
(call $printI64 (i64.const 5678))
(call $printF32 (f32.const 0x1.91eb86p+1 (;=3.14;)))
(call $printF64 (f64.const 0x1.5ae147ae147aep+1 (;=2.71;)))
)
测试指令只有两条:i32.eqz
和i64.eqz
。下面的例子展示了i32.eqz
指令的用法:
export function testOps(a: i32): void {
if (a == 0) { // i32.eqz
printI32(123);
}
}
下面是编译结果:
(func $testOps (type 0) (param i32)
(if (i32.eqz (local.get 0))
(then (call $printI32 (i32.const 123)))
)
)
AS语言支持的关系运算符可以用比较指令实现,下面的例子展示了i32
类型比较指令的用法:
export function relOps(a: i32, b: i32, c: u32, d: u32): void {
if (a == b) { printI32(0); } // i32.eq
if (a != b) { printI32(1); } // i32.ne
if (a < b) { printI32(2); } // i32.lt_s
if (c < d) { printI32(3); } // i32.lt_u
if (a > b) { printI32(4); } // i32.gt_s
if (c > d) { printI32(5); } // i32.gt_u
if (a <= b) { printI32(6); } // i32.le_s
if (c <= d) { printI32(7); } // i32.le_u
if (a >= b) { printI32(8); } // i32.ge_s
if (c >= d) { printI32(9); } // i32.ge_u
}
下面是编译结果:
(func relOps (type 2) (param i32 i32 i32 i32)
(if (i32.eq (local.get 0) (local.get 1))
(then (call $printI32 (i32.const 0))))
(if (i32.ne (local.get 0) (local.get 1))
(then (call $printI32 (i32.const 1))))
(if (i32.lt_s (local.get 0) (local.get 1))
(then (call $printI32 (i32.const 2))))
(if (i32.lt_u (local.get 2) (local.get 3))
(then (call $printI32 (i32.const 3))))
(if (i32.gt_s (local.get 0) (local.get 1))
(then (call $printI32 (i32.const 4))))
(if (i32.gt_u (local.get 2) (local.get 3))
(then (call $printI32 (i32.const 5))))
(if (i32.le_s (local.get 0) (local.get 1))
(then (call $printI32 (i32.const 6))))
(if (i32.le_u (local.get 2) (local.get 3))
(then (call $printI32 (i32.const 7))))
(if (i32.ge_s (local.get 0) (local.get 1))
(then (call $printI32 (i32.const 8))))
(if (i32.ge_u (local.get 2) (local.get 3))
(then (call $printI32 (i32.const 9))))
)
除了浮点数取反运算以外,其他一元运算指令并没有直接被AS编译器使用,但是可以通过内置函数生成。下面的例子展示了i32
和f32
类型一元运算指令的用法:
export function unOps(a: i32, b: f32): void {
printI32(clz<i32>(a)); // i32.clz
printI32(ctz<i32>(a)); // i32.ctz
printI32(popcnt<i32>(a)); // i32.popcnt
printF32(abs<f32>(b)); // f32.abs
printF32(-b); // f32.neg
printF32(sqrt<f32>(b)); // f32.sqrt
printF32(floor<f32>(b)); // f32.floor
printF32(trunc<f32>(b)); // f32.trunc
printF32(nearest<f32>(b)); // f32.nearest
}
下面是编译结果:
(func unOps (type 3) (param i32 f32 f32)
(call $printI32 (i32.clz (local.get 0)))
(call $printI32 (i32.ctz (local.get 0)))
(call $printI32 (i32.popcnt (local.get 0)))
(call $printF32 (f32.abs (local.get 1)))
(call $printF32 (f32.neg (local.get 1)))
(call $printF32 (f32.sqrt (local.get 1)))
(call $printF32 (f32.floor (local.get 1)))
(call $printF32 (f32.trunc (local.get 1)))
(call $printF32 (f32.nearest (local.get 1)))
)
AS语言支持的二元运算符可以用二元运算指令实现,下面的例子展示了i32
类型二元运算指令的用法:
export function binOps(a: i32, b: i32, c: u32, d: u32, e: f32, f: f32): void {
printI32(a + b); // i32.add
printI32(a - b); // i32.sub
printI32(a * b); // i32.mul
printI32(a / b); // i32.div_s
printI32(c / d); // i32.div_u
printI32(a % b); // i32.rem_s
printI32(c % d); // i32.rem_u
printI32(a & b); // i32.and
printI32(a | b); // i32.or
printI32(a ^ b); // i32.xor
printI32(a << b); // i32.shl
printI32(a >> b); // i32.shr_s
printI32(a >>> b); // i32.shr_u
printI32(rotl<i32>(a, b)); // i32.rotl
printI32(rotr<i32>(a, b)); // i32.rotr
}
由于AS语言没有“循环位移”运算符,所以我们只能通过内置函数来生成循环位移指令。下面是编译结果:
(func binOps (type 3) (param i32 i32 i32 i32 f32 f32)
(call $printI32 (i32.add (local.get 0) (local.get 1)))
(call $printI32 (i32.sub (local.get 0) (local.get 1)))
(call $printI32 (i32.mul (local.get 0) (local.get 1)))
(call $printI32 (i32.div_s (local.get 0) (local.get 1)))
(call $printI32 (i32.div_s (local.get 2) (local.get 3)))
(call $printI32 (i32.rem_s (local.get 0) (local.get 1)))
(call $printI32 (i32.rem_s (local.get 2) (local.get 3)))
(call $printI32 (i32.and (local.get 0) (local.get 1)))
(call $printI32 (i32.or (local.get 0) (local.get 1)))
(call $printI32 (i32.xor (local.get 0) (local.get 1)))
(call $printI32 (i32.shl (local.get 0) (local.get 1)))
(call $printI32 (i32.shr_s (local.get 0) (local.get 1)))
(call $printI32 (i32.shr_u (local.get 0) (local.get 1)))
(call $printI32 (i32.rotl (local.get 0) (local.get 1)))
(call $printI32 (i32.rotr (local.get 0) (local.get 1)))
)
AS语言中的类型转换操作可以通过类型转换指令实现,下面是一个例子:
export function cvtOps(a: i32, b: i64, c: u32, d: u64, e: f32, f: f64): void {
printI32(b as i32); // i32.wrap_i64
printI32(e as i32); // i32.trunc_f32_s
printI32(e as u32); // i32.trunc_f32_u
printI32(f as i32); // i32.trunc_f64_s
printI32(f as u32); // i32.trunc_f64_u
printI64(a); // i64.extend_i32_s
printI64(a as u32); // i64.extend_i32_u
printI64(e as i64); // i64.trunc_f32_s
printI64(e as u64); // i64.trunc_f32_u
printI64(f as i64); // i64.trunc_f64_s
printI64(f as u64); // i64.trunc_f64_u
printF32(a as f32); // f32.convert_i32_s
printF32(c as f32); // f32.convert_i32_u
printF32(b as f32); // f32.convert_i64_s
printF32(d as f32); // f32.convert_i64_u
printF32(f as f32); // f32.demote_f64
printF64(a as f64); // f64.convert_i32_s
printF64(c as f64); // f64.convert_i32_u
printF64(b as f64); // f64.convert_i64_s
printF64(d as f64); // f64.convert_i64_u
printF64(e); // f64.promote_f32
printI32(reinterpret<i32>(e)); // i32.reinterpret_f32
printI64(reinterpret<i64>(f)); // i64.reinterpret_f64
printF32(reinterpret<f32>(a)); // f32.reinterpret_i32
printF64(reinterpret<f64>(b)); // f64.reinterpret_i64
}
下面是编译结果:
(func cvtOps (type 4) (param i32 i64 i32 i64 f32 f64)
(call $printI32 (i32.wrap_i64 (local.get 1)))
(call $printI32 (i32.trunc_f32_s (local.get 4)))
(call $printI32 (i32.trunc_f32_u (local.get 4)))
(call $printI32 (i32.trunc_f64_s (local.get 5)))
(call $printI32 (i32.trunc_f64_u (local.get 5)))
(call $printI64 (i64.extend_i32_s (local.get 0)))
(call $printI64 (i64.extend_i32_u (local.get 0)))
(call $printI64 (i64.trunc_f32_s (local.get 4)))
(call $printI64 (i64.trunc_f32_u (local.get 4)))
(call $printI64 (i64.trunc_f64_s (local.get 5)))
(call $printI64 (i64.trunc_f64_u (local.get 5)))
(call $printF32 (f32.convert_i32_s (local.get 0)))
(call $printF32 (f32.convert_i32_u (local.get 2)))
(call $printF32 (f32.convert_i64_s (local.get 1)))
(call $printF32 (f32.convert_i64_u (local.get 3)))
(call $printF32 (f32.demote_f64 (local.get 5)))
(call $printF64 (f64.convert_i32_s (local.get 0)))
(call $printF64 (f64.convert_i32_u (local.get 2)))
(call $printF64 (f64.convert_i64_s (local.get 1)))
(call $printF64 (f64.convert_i64_u (local.get 3)))
(call $printF64 (f64.promote_f32 (local.get 4)))
(call $printI32 (i32.reinterpret_f32 (local.get 4)))
(call $printI64 (i64.reinterpret_f64 (local.get 5)))
(call $printF32 (f32.reinterpret_i32 (local.get 0)))
(call $printF64 (f64.reinterpret_i64 (local.get 1)))
)
总结
本文讨论了AS编译器如何通过各种Wasm指令来实现AS语法要素,简单来说:各种控制结构通过控制指令来实现、局部变量和全局变量的读写通过变量指令来实现、内存操作通过内存指令来实现、各种运算符和类型转换通过数值指令来实现。在后面的文章中,我们还将深入讨论AS如何实现面向对象编程和自动内存管理。
*本文由CoinEx Chain开发团队成员Chase撰写。CoinEx Chain是全球首条基于Tendermint共识协议和Cosmos SDK开发的DEX专用公链,借助IBC来实现DEX公链、智能合约链、隐私链三条链合一的方式去解决可扩展性(Scalability)、去中心化(Decentralization)、安全性(security)区块链不可能三角的问题,能够高性能的支持数字资产的交易以及基于智能合约的Defi应用。