lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


After reading this post
http://indefinitestudies.org/2010/02/08/creating-a-toy-virtual-machine-with-pypy/
and the referenced paper
http://codespeak.net/pypy/extradoc/talk/Ficooolps2009/bolz-tracing-jit.pdf
.

I try to compare LuaJIT 2 and PyPy JIT, so I port the same toy
interpreter in Lua (toy.lua)
with the same application which computes the square of the accumulator.
And I write too a pure Python version without any PyPy stuff (toy.py)
and C version (toy.c).

I obtain the following results :
the compilation of target-toy-native needs 75s (size ~ 120kb).
the compilation of target-toy-jit needs 698s (size ~ 2Gb).

time python target-toy.py 1000000
	real 41.777s
time target-toy-native 1000000
	real 1.668s
time target-toy-jit 1000000
	real 0.622s
time python toy.py 1000000
	real 7.860s
time lua toy.lua 1000000
	real 3.853s
time luajit toy.lua 1000000
	real 0.355s
time ./toy 1000000
	real 0.145s
	real 0.068s (when compiled with gcc -O2)

These tests are done on Ubuntu 9.10 (x86) with :
- gcc 4.4.1 (Ubuntu package)
- lua 5.1.4 (Ubuntu package)
- luajit 2 head
- python 2.6.4 (Ubuntu package)
- pypy trunk

Lua is faster than Python, and LuaJIT is faster than PyPy/C output and
PyPy/JIT output.

In order to speed up toy.lua, Mike Pall propose to replace all those
pseudo-constants with real constants (see toy-mp.lua).

François
import os, sys
import autopath
import py

# these are the opcodes for the interpreted language
JUMP_IF_A  = 1
MOV_A_R    = 2
MOV_R_A    = 3
ADD_R_TO_A = 4
DECR_A     = 5
RETURN_A   = 6

from pypy.rlib.jit import JitDriver
tlrjitdriver = JitDriver(greens = ['pc', 'bytecode'],
                         reds = ['a', 'regs'])

# the main interpreter loop
def interpret(bytecode, a):
   regs = [0] * 256
   pc = 0
   while True:
       tlrjitdriver.jit_merge_point(bytecode=bytecode, pc=pc, a=a, regs=regs)
       opcode = bytecode[pc]
       pc += 1
       if opcode == JUMP_IF_A:
           target = bytecode[pc]
           pc += 1
           if a:
               if target<pc:
                   tlrjitdriver.can_enter_jit(bytecode=bytecode, pc=target, a=a, regs=regs)
               pc = target
       elif opcode == MOV_A_R:
           n = bytecode[pc]
           pc += 1
           regs[n] = a
       elif opcode == MOV_R_A:
           n = bytecode[pc]
           pc += 1
           a = regs[n]
       elif opcode == ADD_R_TO_A:
           n = bytecode[pc]
           pc += 1
           a += regs[n]
       elif opcode == DECR_A:
           a -= 1
       elif opcode == RETURN_A:
           return a

# __________  Entry point  __________
def entry_point(argv):
    # the program we want to interpret
    # it computes the square of its argument
    bytecode = [
        MOV_A_R,    0, # i = a
        MOV_A_R,    1, # copy of 'a'
        # 4:
        MOV_R_A,    0, # i--
        DECR_A,
        MOV_A_R,    0,
        MOV_R_A,    2, # res += a
        ADD_R_TO_A, 1,
        MOV_A_R,    2,
        MOV_R_A,    0, # if i!=0: goto 4
        JUMP_IF_A,  4,
        MOV_R_A,    2,
        RETURN_A
    ]
    result = interpret(bytecode, int(argv[1]))
    print result
    return 0

def jitpolicy(driver):
    from pypy.jit.metainterp.policy import JitPolicy
    return JitPolicy()

# _____ Define and setup target ___
def target(*args):
    return entry_point, None

# main function, if this script is called from the command line
if __name__ == '__main__':
    entry_point(sys.argv)
import sys

# these are the opcodes for the interpreted language
JUMP_IF_A  = 1
MOV_A_R    = 2
MOV_R_A    = 3
ADD_R_TO_A = 4
DECR_A     = 5
RETURN_A   = 6

# the main interpreter loop
def interpret(bytecode, a):
    regs = [0] * 256
    pc = 0
    while True:
        opcode = bytecode[pc]
        pc += 1
        if opcode == JUMP_IF_A:
            target = bytecode[pc]
            pc += 1
            if a:
                pc = target
        elif opcode == MOV_A_R:
            n = bytecode[pc]
            pc += 1
            regs[n] = a
        elif opcode == MOV_R_A:
            n = bytecode[pc]
            pc += 1
            a = regs[n]
        elif opcode == ADD_R_TO_A:
            n = bytecode[pc]
            pc += 1
            a += regs[n]
        elif opcode == DECR_A:
            a -= 1
        elif opcode == RETURN_A:
            return a

# __________  Entry point  __________
def entry_point(argv):
    # the program we want to interpret
    # it computes the square of its argument
    bytecode = [
        MOV_A_R,    0, # i = a
        MOV_A_R,    1, # copy of 'a'
        # 4:
        MOV_R_A,    0, # i--
        DECR_A,
        MOV_A_R,    0,
        MOV_R_A,    2, # res += a
        ADD_R_TO_A, 1,
        MOV_A_R,    2,
        MOV_R_A,    0, # if i!=0: goto 4
        JUMP_IF_A,  4,
        MOV_R_A,    2,
        RETURN_A
    ]
    result = interpret(bytecode, int(argv[1]))
    print result

# main function, if this script is called from the command line
entry_point(sys.argv)
-- these are the opcodes for the interpreted language
local JUMP_IF_A  = 1
local MOV_A_R    = 2
local MOV_R_A    = 3
local ADD_R_TO_A = 4
local DECR_A     = 5
local RETURN_A   = 6

-- the main interpreter loop
local function interpret (bytecode, a)
    local regs = {
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    }
    local pc = 1
    while true do
        local opcode = bytecode[pc]
        pc = pc + 1
        if opcode == JUMP_IF_A then
            local target = bytecode[pc]
            pc = pc + 1
            if a ~= 0 then
                pc = target
            end
        elseif opcode == MOV_A_R then
            local n = bytecode[pc]
            pc = pc + 1
            regs[n] = a
        elseif opcode == MOV_R_A then
            local n = bytecode[pc]
            pc = pc + 1
            a = regs[n]
        elseif opcode == ADD_R_TO_A then
            local n = bytecode[pc]
            pc = pc + 1
            a = a + regs[n]
        elseif opcode == DECR_A then
            a = a - 1
        elseif opcode == RETURN_A then
            return a
        end
    end
end

-- __________  Entry point  __________
local function entry_point(argv)
    -- the program we want to interpret
    -- it computes the square of its argument
    bytecode = {
        MOV_A_R,    1, -- i = a
        MOV_A_R,    2, -- copy of 'a'
        -- 5:
        MOV_R_A,    1, -- i--
        DECR_A,
        MOV_A_R,    1,
        MOV_R_A,    3, -- res += a
        ADD_R_TO_A, 2,
        MOV_A_R,    3,
        MOV_R_A,    1, -- if i!=0: goto 5
        JUMP_IF_A,  5,
        MOV_R_A,    3,
        RETURN_A
    }
    result = interpret(bytecode, tonumber(argv[1]))
    print(result)
end

-- main function
entry_point(arg)

-- the main interpreter loop
local function interpret (bytecode, a)
    local regs = {
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    }
    local pc = 1
    while true do
        local opcode = bytecode[pc]
        pc = pc + 1
        if opcode == 'JUMP_IF_A' then
            local target = bytecode[pc]
            pc = pc + 1
            if a ~= 0 then
                pc = target
            end
        elseif opcode == 'MOV_A_R' then
            local n = bytecode[pc]
            pc = pc + 1
            regs[n] = a
        elseif opcode == 'MOV_R_A' then
            local n = bytecode[pc]
            pc = pc + 1
            a = regs[n]
        elseif opcode == 'ADD_R_TO_A' then
            local n = bytecode[pc]
            pc = pc + 1
            a = a + regs[n]
        elseif opcode == 'DECR_A' then
            a = a - 1
        elseif opcode == 'RETURN_A' then
            return a
        end
    end
end

-- __________  Entry point  __________
local function entry_point(argv)
    -- the program we want to interpret
    -- it computes the square of its argument
    bytecode = {
        'MOV_A_R',    1, -- i = a
        'MOV_A_R',    2, -- copy of 'a'
        -- 5:
        'MOV_R_A',    1, -- i--
        'DECR_A',
        'MOV_A_R',    1,
        'MOV_R_A',    3, -- res += a
        'ADD_R_TO_A', 2,
        'MOV_A_R',    3,
        'MOV_R_A',    1, -- if i!=0: goto 5
        'JUMP_IF_A',  5,
        'MOV_R_A',    3,
        'RETURN_A'
    }
    result = interpret(bytecode, tonumber(argv[1]))
    print(result)
end

-- main function, if this script is called from the command line
entry_point(arg)
#include <stdio.h>
#include <stdlib.h>

// these are the opcodes for the interpreted language
#define JUMP_IF_A      1
#define MOV_A_R        2
#define MOV_R_A        3
#define ADD_R_TO_A     4
#define DECR_A         5
#define RETURN_A       6

typedef unsigned char opcode_t;
typedef long long register__t;

// the main interpreter loop
static register__t interpret(const opcode_t bytecode[], register__t a)
{
    register__t regs[256];
    unsigned pc = 0;
    for (;;) {
        opcode_t opcode = bytecode[pc];
        pc += 1;
        switch (opcode) {
            case JUMP_IF_A: {
                unsigned target = bytecode[pc];
                pc += 1;
                if (a)
                    pc = target;
                break;
            }
            case MOV_A_R: {
                opcode_t n = bytecode[pc];
                pc += 1;
                regs[n] = a;
                break;
            }
            case MOV_R_A: {
                opcode_t n = bytecode[pc];
                pc += 1;
                a = regs[n];
                break;
            }
            case ADD_R_TO_A: {
                opcode_t n = bytecode[pc];
                pc += 1;
                a += regs[n];
                break;
            }
            case DECR_A: {
                a -= 1;
                break;
            }
            case RETURN_A: {
                return a;
            }
        }
    }
}

// __________  Entry point  __________
static void entry_point(char *argv[])
{
    // the program we want to interpret
    // it computes the square of its argument
    static const opcode_t bytecode[] = {
        MOV_A_R,    0, // i = a
        MOV_A_R,    1, // copy of 'a'
        // 4:
        MOV_R_A,    0, // i--
        DECR_A,
        MOV_A_R,    0,
        MOV_R_A,    2, // res += a
        ADD_R_TO_A, 1,
        MOV_A_R,    2,
        MOV_R_A,    0, // if i!=0: goto 4
        JUMP_IF_A,  4,
        MOV_R_A,    2,
        RETURN_A
    };
    register__t result = interpret(bytecode, atoi(argv[1]));
    printf("%lld\n", result);
}

int main(int argc, char* argv[])
{
    entry_point(argv);
    return EXIT_SUCCESS;
}