from html.parser import HTMLParser
import sys
class MyHTMLParser(HTMLParser):
def __init__(self, in_fn,out_fn):
HTMLParser.__init__(self)
f_in = open(in_fn,'r')
self.quote = 0
self.descr = ''
self.f_out = open(out_fn,'w')
self.feed(f_in.read())
f_in.close()
self.f_out.close()
def handle_starttag(self, tag, attrs):
#print ("start of a %s" % tag)
#print (attrs)
self.start = 1
if tag == 'input':
self.f_out.write('\n')
for k in attrs:
if k[0] == 'stname':
self.f_out.write('\n\t%s'%k[1])
if k[0] == 'stid':
self.f_out.write('\n\t%s'%k[1])
self.quote = 1
def handle_endtag(self, tag):
self.start = 0
if tag == 'tr' and self.quote == 1:
if self.descr:
self.f_out.write('\n\t%s'%self.descr)
self.f_out.write('\n
')
self.quote = 0
self.descr = 1
#print ("end of a %s" % tag)
def handle_data(self, data):
if self.start == 1:
self.descr = data
#print ("Data %s" % self.descr)
parser = MyHTMLParser(sys.argv[1],sys.argv[2])
parser.close()