"""Parse treepkg buildreports""" import HTMLParser class ReportParser(HTMLParser.HTMLParser): def __init__(self): HTMLParser.HTMLParser.__init__(self) self.body_start_end = [(0,0),(0,0)] self.style_start_end = [(0,0),(0,0)] self.title_start_end = [(0,0), (0,0)] self.raw_data = '' def handle_starttag(self, tag, attr): # copy raw data if self.raw_data == '': self.raw_data = self.rawdata if tag == 'body': self.body_start_end[0] = self.getpos() if tag == 'style': self.style_start_end[0] = self.getpos() if tag == 'title': self.title_start_end[0] = self.getpos() def handle_endtag(self, tag): if tag == 'body': self.body_start_end[1] = self.getpos() if tag == 'style': self.style_start_end[1] = self.getpos() if tag == 'title': self.title_start_end[1] = self.getpos() def x_seek(self, start, end): # Record raw content from - to line if(start[0] > end[0]): raise 'Invalid range %s to %s' % (start, end) rawdata = self.raw_data rawdata = rawdata.splitlines(1) line_i = 0 rec = 0 content = '' for line in rawdata: if line_i == start[0] and start[0] != end[0]: rec = 1 if line_i == (end[0]-1) and start[0] != end[0]: content += line[:end[1]] break if line_i == (start[0]-1) and start[0] == end[0]: content = line[start[1]:end[1]] break if rec == 1: content += line line_i += 1 return content def get_body_content(self): return self.x_seek(self.body_start_end[0], self.body_start_end[1]) def get_style_content(self): return self.x_seek(self.style_start_end[0], self.style_start_end[1]) def get_title(self): return self.x_seek(self.title_start_end[0], self.title_start_end[1]).replace('