from html.parser import HTMLParser import sys from xml.etree.ElementTree import Element, ElementTree, SubElement class MyHTMLParser(HTMLParser): def __init__(self,in_fn,out_fn): HTMLParser.__init__(self) f_in = open(in_fn,'r') self.quote = 0 self.start = 0 self.parse_option = 0 self.elQuote = Element("fake") elProvider = Element("Provider") SubElement(elProvider,'name').text = 'Google' SubElement(elProvider,'ref').text = 'http://www.google.com' SubElement(elProvider,'url').text = 'http://www.google.com/finance/converter?a=1&' self.root = SubElement(elProvider,'section') SubElement(self.root,'name').text = 'Currencies' self.feed(f_in.read()) f_in.close() ElementTree(elProvider).write(out_fn) def handle_starttag(self, tag, attrs): self.start = 1 if tag == 'select': if self.parse_option == 0: for k in attrs: if k[0] == 'name' and k[1] == 'from': self.parse_option = 1 break else: self.parse_option == 0 elif self.parse_option == 1 and tag == 'option': for k in attrs: if k[0] == 'value': self.elQuote = SubElement(self.root,'quote') SubElement(self.elQuote,'id').text = k[1] SubElement(self.elQuote,'symbol').text = k[1] break def handle_endtag(self, tag): self.start = 0 if tag == 'select': self.parse_option == 0 def handle_data(self, data): if self.start == 1 and self.parse_option == 1: SubElement(self.elQuote,'description').text = data parser = MyHTMLParser(sys.argv[1],sys.argv[2]) parser.close()