brainfucked.py again. Or: How not to optimise
After slaving over a warm laptop (while getting lost on the train), I re-wrote my bf interpreter to use a kind of psudo-bytecode to optimise jumps, and big sets of increments and movement operations. Below is the new code.
#!/usr/bin/python
"""
brainfucked.py a simple (and hopefully easy to understand)
Brainfuck interpreter written in Python.
Official page: http://www.muppetlabs.com/~breadbox/bf/
Wikipedia: http://en.wikipedia.org/wiki/Brainfuck
Program archive: .
Copyright (C) 2007 Matthew Davey
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
"""
from time import time
import sys
import re
import os
class BytecodeInstruction:
OP_MOV = 1
OP_INC = 2
OP_LOOP_START = 3
OP_LOOP_END = 4
OP_INPUT = 5
OP_OUTPUT = 6
def __init__(self, opcode, value):
self.opcode = opcode
self.value = value
def __str__(self):
if self.opcode == self.OP_MOV:
return "MOVE %d" % (self.value,)
elif self.opcode == self.OP_INC:
return "INCREMENT %d" % (self.value,)
elif self.opcode == self.OP_LOOP_START:
return "OPEN LOOP (END: %d)" % (self.value,)
elif self.opcode == self.OP_LOOP_END:
return "CLOSE LOOP (START: %d)" % (self.value,)
elif self.opcode == self.OP_INPUT:
return "INPUT"
elif self.opcode == self.OP_OUTPUT:
return "OUTPUT"
class BytecodeException(Exception):
pass
class BrainfuckedInterpreter:
def compile_to_bytecode(self, program):
"""
Turn the passed brainfuck program into our pretend bytecode
TODO: Check for NO_OPs (OP_MOV, 0) or (OP_INC, 0)
TODO: Try to find some patterns that can be expressed simplier
"""
# Remove all non-instructions
program = re.findall("[[\]<>+-.,]", program)
# Holds the generated bytecode
bytecode = []
# Prime the byte code to simplify the checks when compiling the first
# instruction. NOTE: This should be remove at the end if it's a NOOP
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_MOV, 0))
# A stack for open brackets. Holds the bytecode location, no program_data
bracket_stack = []
for instruction in program:
if instruction in ['+', '-']:
# If the last instruction wasn't a OP_INC, then create a new
# OP_INC instruction initilised to '0'
if bytecode[-1].opcode != BytecodeInstruction.OP_INC:
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_INC, 0))
if instruction == '+':
bytecode[-1].value += 1
else:
bytecode[-1].value -= 1
elif instruction in ['>', '<']:
# Just like above, if the previous instruction was the same
# type, then just change the amoutn moved
if bytecode[-1].opcode != BytecodeInstruction.OP_MOV:
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_MOV, 0))
if instruction == '>':
bytecode[-1].value += 1
else:
bytecode[-1].value -= 1
elif instruction == ',':
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_INPUT, False))
elif instruction == '.':
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_OUTPUT, False))
elif instruction == '[':
# We don't know where to jump to yet, so just store False
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_LOOP_START, False))
# Use a stack to store our current location so we can match
# up brackets correctly
bracket_stack.append(len(bytecode)-1)
elif instruction == ']':
# The location (bytecode, not program) of the open bracket
try:
bracket_location = bracket_stack.pop()
except IndexError:
raise BytecodeException("Unmatched ']' encoutered")
# Add the closeing bracket here, and point it to the location
# of the opening bracket
bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_LOOP_END, bracket_location))
# Now, we go back to the open bracket and fill in the location
# that is should jump too when the cell = 0
bytecode[bracket_location].value = len(bytecode) - 1
# Make sure all the brackets have been matched
if len(bracket_stack) != 0:
raise BytecodeException("Unmatched '[' encoutered")
return bytecode
def run_bytecode(self, bytecode, use_stdout = False, debug = False):
# Program output
output = ''
# Out Tape/Memory
memory = [0]
memory_pointer = 0
# How we are doing processing the bytecode
bytecode_length = len(bytecode)
bytecode_pointer = 0
while bytecode_pointer < bytecode_length:
instruction = bytecode[bytecode_pointer]
if debug:
print instruction
if instruction.opcode == BytecodeInstruction.OP_INC:
memory[memory_pointer] += instruction.value
elif instruction.opcode == BytecodeInstruction.OP_MOV:
# Can't move before cell: 0
if memory_pointer + instruction.value < 0:
raise Exception("Tried to move before the start of the memory block")
# Are we going past the end of the list? Then we need to expand it
if memory_pointer + instruction.value > len(memory)-1:
# Wow, talk about naive :)
# FIXME later
for i in range(0, instruction.value):
memory.append(0)
memory_pointer += instruction.value
elif instruction.opcode == BytecodeInstruction.OP_LOOP_START:
if memory[memory_pointer] == 0:
bytecode_pointer = instruction.value
elif instruction.opcode == BytecodeInstruction.OP_LOOP_END:
if memory[memory_pointer] != 0:
bytecode_pointer = instruction.value
elif instruction.opcode == BytecodeInstruction.OP_INPUT:
char = sys.stdin.read(1)
if char == '':
memory[memory_pointer] = 0
else:
# Remember to turn character 'A' into it's ASCII number
memory[memory_pointer] = ord(char)
elif instruction.opcode == BytecodeInstruction.OP_OUTPUT:
output += chr(memory[memory_pointer])
if use_stdout:
sys.stdout.write(chr(memory[memory_pointer]))
if debug:
print memory
bytecode_pointer += 1
return output
if __name__ == '__main__':
usage = "Usage: %s <filename> [enable_timer: True | False]" % (sys.argv[0],)
timer = False
if len(sys.argv) not in [2,3] or sys.argv[1] == 'help':
print usage
sys.exit(1)
if not os.path.exists(sys.argv[1]):
print usage
print "File not found"
sys.exit(1)
try:
program = open(sys.argv[1]).read()
except Exception, e:
print usage
print "Unable to open file: %s" % (e.__str__(),)
sys.exit(1)
if len(sys.argv) == 3:
if sys.argv[2] not in ['True', 'False']:
print usage
print "Second argument must be 'True' or 'False'"
sys.exit(1)
if sys.argv[2] == 'True':
timer = True
if timer:
start_time = time()
bf = BrainfuckedInterpreter()
bytecode = bf.compile_to_bytecode(program)
if timer:
compile_time = time()
bf.run_bytecode(bytecode, use_stdout=True)
# print bf.run_bytecode(bytecode, use_stdout=False)
if timer:
print
print "Elapsed time: %0.2f Compile time: %0.2f" % (time() - start_time, compile_time - start_time)
Now, proof that all my work paid off…
matthewd@wintermute:~/brainfucked$ time echo 50 | python brainfucked_old.py prime.bf Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 real 0m13.706s user 0m13.653s sys 0m0.028s matthewd@wintermute:~/brainfucked$ time echo 50 | python brainfucked.py prime.bf Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 real 0m16.305s user 0m16.293s sys 0m0.000s
I can assure you, that’s not what I expected either.
‘Optimsing’ this program has actually taught me a valuable lesson: ‘Never assume you know what needs fixing or speeding up’. I added more complexity to this program trying to improve it’s speed without once profiling or even adding a single extra time() statement. I blindly assumed I knew what the problem was, and dived straight into fixing it without another thought.
Still, it will make it easier to implement a Ook!, whitespace, or other Brainfuck derivatives now :-)