WoW Combat Log Splitter
Something quick I whipped up last night, after noticing that after my log file was > 4GB the WorldOfLogs parser will no longer do real time logging.
Edit: Turns out that the WoW client itself stopped logging, even though the log file was a little over expected limit (4,334,806,196 bytes)
Note: The code is just a one off script; things are hard coded, and it’s pretty slow (100MB a minute)
#!/usr/bin/env python
import re
import datetime
GAP_SIZE_IN_SECONDS = 60 * 60
class CombatLog:
def __init__(self, filename):
self.filename = filename
def process(self):
last_timestamp = None
line_count = 0
split_log = None
for line in open(self.filename):
line_count += 1
timestamp = self.parse_timestamp(line)
if timestamp == None:
print "Unparsable data on line %d" % (line_count,)
print repr(line)
print
continue
# To handle the first line
if last_timestamp == None:
last_timestamp = timestamp
split_log = Appender(timestamp)
print "Starting new file", split_log.filename
difference = timestamp - last_timestamp
if difference.seconds > GAP_SIZE_IN_SECONDS:
# Close the old log file, and start a new one
split_log.close()
split_log = Appender(timestamp)
print "Starting new file", split_log.filename
split_log.append(line)
last_timestamp = timestamp
if line_count % 100000 == 0:
print "Processed %d lines" % (line_count,)
def parse_timestamp(self, line):
# m/d hh:mm:ss.msec
# 6/6 21:04:29.435
regex = r"^(\d+)/(\d+) (\d+):(\d+):(\d+).(\d+) "
matches = re.search(regex, line)
if matches == None:
return None
timestamp = datetime.datetime(2009,
int(matches.group(1)),
int(matches.group(2)),
int(matches.group(3)),
int(matches.group(4)),
int(matches.group(5)),
int(matches.group(6))*1000);
return timestamp
class Appender:
def __init__(self, timestamp):
self.filename = "WoWCombatLog_" + timestamp.strftime("%Y%m%d_%H%M%S") + ".txt"
self.handle = open(self.filename, 'a')
def append(self, line):
self.handle.write(line)
def close(self):
self.handle.close()
splitter = CombatLog("../WoWCombatLog.20090904.txt")
splitter.process()
print "Done"
Comment from Cryoclasm on August 9th 2011
Here’s an improved version of the timestamp function that eliminates the hard-coded year 2009. It should work properly as long as the clock hasn’t been set back since the start of the last log (may be an issue for speed-kill runs when DST ends) and the oldest log entry is less than a year old.
def parse_timestamp(self, line):
# m/d hh:mm:ss.msec
# 6/6 21:04:29.435
regex = r”^(\d+)/(\d+) (\d+):(\d+):(\d+).(\d+) ”
matches = re.search(regex, line)
if matches == None:
return None
now = datetime.datetime.now()
timestamp = datetime.datetime(now.year,
int(matches.group(1)),
int(matches.group(2)),
int(matches.group(3)),
int(matches.group(4)),
int(matches.group(5)),
int(matches.group(6))*1000);
if timestamp > now: # this log is from last year
timestamp.year -= 1
return timestamp