[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: LuaJIT vs. PyPy JIT
- From: François Perrad <francois.perrad@...>
- Date: Wed, 10 Feb 2010 10:05:08 +0100
After reading this post
http://indefinitestudies.org/2010/02/08/creating-a-toy-virtual-machine-with-pypy/
and the referenced paper
http://codespeak.net/pypy/extradoc/talk/Ficooolps2009/bolz-tracing-jit.pdf
.
I try to compare LuaJIT 2 and PyPy JIT, so I port the same toy
interpreter in Lua (toy.lua)
with the same application which computes the square of the accumulator.
And I write too a pure Python version without any PyPy stuff (toy.py)
and C version (toy.c).
I obtain the following results :
the compilation of target-toy-native needs 75s (size ~ 120kb).
the compilation of target-toy-jit needs 698s (size ~ 2Gb).
time python target-toy.py 1000000
real 41.777s
time target-toy-native 1000000
real 1.668s
time target-toy-jit 1000000
real 0.622s
time python toy.py 1000000
real 7.860s
time lua toy.lua 1000000
real 3.853s
time luajit toy.lua 1000000
real 0.355s
time ./toy 1000000
real 0.145s
real 0.068s (when compiled with gcc -O2)
These tests are done on Ubuntu 9.10 (x86) with :
- gcc 4.4.1 (Ubuntu package)
- lua 5.1.4 (Ubuntu package)
- luajit 2 head
- python 2.6.4 (Ubuntu package)
- pypy trunk
Lua is faster than Python, and LuaJIT is faster than PyPy/C output and
PyPy/JIT output.
In order to speed up toy.lua, Mike Pall propose to replace all those
pseudo-constants with real constants (see toy-mp.lua).
François
import os, sys
import autopath
import py
# these are the opcodes for the interpreted language
JUMP_IF_A = 1
MOV_A_R = 2
MOV_R_A = 3
ADD_R_TO_A = 4
DECR_A = 5
RETURN_A = 6
from pypy.rlib.jit import JitDriver
tlrjitdriver = JitDriver(greens = ['pc', 'bytecode'],
reds = ['a', 'regs'])
# the main interpreter loop
def interpret(bytecode, a):
regs = [0] * 256
pc = 0
while True:
tlrjitdriver.jit_merge_point(bytecode=bytecode, pc=pc, a=a, regs=regs)
opcode = bytecode[pc]
pc += 1
if opcode == JUMP_IF_A:
target = bytecode[pc]
pc += 1
if a:
if target<pc:
tlrjitdriver.can_enter_jit(bytecode=bytecode, pc=target, a=a, regs=regs)
pc = target
elif opcode == MOV_A_R:
n = bytecode[pc]
pc += 1
regs[n] = a
elif opcode == MOV_R_A:
n = bytecode[pc]
pc += 1
a = regs[n]
elif opcode == ADD_R_TO_A:
n = bytecode[pc]
pc += 1
a += regs[n]
elif opcode == DECR_A:
a -= 1
elif opcode == RETURN_A:
return a
# __________ Entry point __________
def entry_point(argv):
# the program we want to interpret
# it computes the square of its argument
bytecode = [
MOV_A_R, 0, # i = a
MOV_A_R, 1, # copy of 'a'
# 4:
MOV_R_A, 0, # i--
DECR_A,
MOV_A_R, 0,
MOV_R_A, 2, # res += a
ADD_R_TO_A, 1,
MOV_A_R, 2,
MOV_R_A, 0, # if i!=0: goto 4
JUMP_IF_A, 4,
MOV_R_A, 2,
RETURN_A
]
result = interpret(bytecode, int(argv[1]))
print result
return 0
def jitpolicy(driver):
from pypy.jit.metainterp.policy import JitPolicy
return JitPolicy()
# _____ Define and setup target ___
def target(*args):
return entry_point, None
# main function, if this script is called from the command line
if __name__ == '__main__':
entry_point(sys.argv)
import sys
# these are the opcodes for the interpreted language
JUMP_IF_A = 1
MOV_A_R = 2
MOV_R_A = 3
ADD_R_TO_A = 4
DECR_A = 5
RETURN_A = 6
# the main interpreter loop
def interpret(bytecode, a):
regs = [0] * 256
pc = 0
while True:
opcode = bytecode[pc]
pc += 1
if opcode == JUMP_IF_A:
target = bytecode[pc]
pc += 1
if a:
pc = target
elif opcode == MOV_A_R:
n = bytecode[pc]
pc += 1
regs[n] = a
elif opcode == MOV_R_A:
n = bytecode[pc]
pc += 1
a = regs[n]
elif opcode == ADD_R_TO_A:
n = bytecode[pc]
pc += 1
a += regs[n]
elif opcode == DECR_A:
a -= 1
elif opcode == RETURN_A:
return a
# __________ Entry point __________
def entry_point(argv):
# the program we want to interpret
# it computes the square of its argument
bytecode = [
MOV_A_R, 0, # i = a
MOV_A_R, 1, # copy of 'a'
# 4:
MOV_R_A, 0, # i--
DECR_A,
MOV_A_R, 0,
MOV_R_A, 2, # res += a
ADD_R_TO_A, 1,
MOV_A_R, 2,
MOV_R_A, 0, # if i!=0: goto 4
JUMP_IF_A, 4,
MOV_R_A, 2,
RETURN_A
]
result = interpret(bytecode, int(argv[1]))
print result
# main function, if this script is called from the command line
entry_point(sys.argv)
-- these are the opcodes for the interpreted language
local JUMP_IF_A = 1
local MOV_A_R = 2
local MOV_R_A = 3
local ADD_R_TO_A = 4
local DECR_A = 5
local RETURN_A = 6
-- the main interpreter loop
local function interpret (bytecode, a)
local regs = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
local pc = 1
while true do
local opcode = bytecode[pc]
pc = pc + 1
if opcode == JUMP_IF_A then
local target = bytecode[pc]
pc = pc + 1
if a ~= 0 then
pc = target
end
elseif opcode == MOV_A_R then
local n = bytecode[pc]
pc = pc + 1
regs[n] = a
elseif opcode == MOV_R_A then
local n = bytecode[pc]
pc = pc + 1
a = regs[n]
elseif opcode == ADD_R_TO_A then
local n = bytecode[pc]
pc = pc + 1
a = a + regs[n]
elseif opcode == DECR_A then
a = a - 1
elseif opcode == RETURN_A then
return a
end
end
end
-- __________ Entry point __________
local function entry_point(argv)
-- the program we want to interpret
-- it computes the square of its argument
bytecode = {
MOV_A_R, 1, -- i = a
MOV_A_R, 2, -- copy of 'a'
-- 5:
MOV_R_A, 1, -- i--
DECR_A,
MOV_A_R, 1,
MOV_R_A, 3, -- res += a
ADD_R_TO_A, 2,
MOV_A_R, 3,
MOV_R_A, 1, -- if i!=0: goto 5
JUMP_IF_A, 5,
MOV_R_A, 3,
RETURN_A
}
result = interpret(bytecode, tonumber(argv[1]))
print(result)
end
-- main function
entry_point(arg)
-- the main interpreter loop
local function interpret (bytecode, a)
local regs = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
local pc = 1
while true do
local opcode = bytecode[pc]
pc = pc + 1
if opcode == 'JUMP_IF_A' then
local target = bytecode[pc]
pc = pc + 1
if a ~= 0 then
pc = target
end
elseif opcode == 'MOV_A_R' then
local n = bytecode[pc]
pc = pc + 1
regs[n] = a
elseif opcode == 'MOV_R_A' then
local n = bytecode[pc]
pc = pc + 1
a = regs[n]
elseif opcode == 'ADD_R_TO_A' then
local n = bytecode[pc]
pc = pc + 1
a = a + regs[n]
elseif opcode == 'DECR_A' then
a = a - 1
elseif opcode == 'RETURN_A' then
return a
end
end
end
-- __________ Entry point __________
local function entry_point(argv)
-- the program we want to interpret
-- it computes the square of its argument
bytecode = {
'MOV_A_R', 1, -- i = a
'MOV_A_R', 2, -- copy of 'a'
-- 5:
'MOV_R_A', 1, -- i--
'DECR_A',
'MOV_A_R', 1,
'MOV_R_A', 3, -- res += a
'ADD_R_TO_A', 2,
'MOV_A_R', 3,
'MOV_R_A', 1, -- if i!=0: goto 5
'JUMP_IF_A', 5,
'MOV_R_A', 3,
'RETURN_A'
}
result = interpret(bytecode, tonumber(argv[1]))
print(result)
end
-- main function, if this script is called from the command line
entry_point(arg)
#include <stdio.h>
#include <stdlib.h>
// these are the opcodes for the interpreted language
#define JUMP_IF_A 1
#define MOV_A_R 2
#define MOV_R_A 3
#define ADD_R_TO_A 4
#define DECR_A 5
#define RETURN_A 6
typedef unsigned char opcode_t;
typedef long long register__t;
// the main interpreter loop
static register__t interpret(const opcode_t bytecode[], register__t a)
{
register__t regs[256];
unsigned pc = 0;
for (;;) {
opcode_t opcode = bytecode[pc];
pc += 1;
switch (opcode) {
case JUMP_IF_A: {
unsigned target = bytecode[pc];
pc += 1;
if (a)
pc = target;
break;
}
case MOV_A_R: {
opcode_t n = bytecode[pc];
pc += 1;
regs[n] = a;
break;
}
case MOV_R_A: {
opcode_t n = bytecode[pc];
pc += 1;
a = regs[n];
break;
}
case ADD_R_TO_A: {
opcode_t n = bytecode[pc];
pc += 1;
a += regs[n];
break;
}
case DECR_A: {
a -= 1;
break;
}
case RETURN_A: {
return a;
}
}
}
}
// __________ Entry point __________
static void entry_point(char *argv[])
{
// the program we want to interpret
// it computes the square of its argument
static const opcode_t bytecode[] = {
MOV_A_R, 0, // i = a
MOV_A_R, 1, // copy of 'a'
// 4:
MOV_R_A, 0, // i--
DECR_A,
MOV_A_R, 0,
MOV_R_A, 2, // res += a
ADD_R_TO_A, 1,
MOV_A_R, 2,
MOV_R_A, 0, // if i!=0: goto 4
JUMP_IF_A, 4,
MOV_R_A, 2,
RETURN_A
};
register__t result = interpret(bytecode, atoi(argv[1]));
printf("%lld\n", result);
}
int main(int argc, char* argv[])
{
entry_point(argv);
return EXIT_SUCCESS;
}