plugins/Quotes/docs/Utility/DukasCopy.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

from html.parser import HTMLParser
import sys

class MyHTMLParser(HTMLParser):
	def __init__(self, in_fn,out_fn):
		HTMLParser.__init__(self)
		f_in = open(in_fn,'r')
		self.quote = 0
		self.descr = ''
		self.f_out = open(out_fn,'w')
		self.feed(f_in.read())
		f_in.close()
		self.f_out.close()
		
	def handle_starttag(self, tag, attrs):
		#print ("start of a %s" % tag)
		#print (attrs)
		self.start = 1
		if tag == 'input':
			self.f_out.write('\n<quote>')
			for  k in attrs:
				if k[0] == 'stname':
					self.f_out.write('\n\t<symbol>%s</symbol>'%k[1])
				if k[0] == 'stid':
					self.f_out.write('\n\t<id>%s</id>'%k[1])
			self.quote = 1
			

	def handle_endtag(self, tag):
		self.start = 0
		if tag == 'tr' and self.quote == 1:
			if self.descr:
				self.f_out.write('\n\t<description>%s</description>'%self.descr)	
			self.f_out.write('\n</quote>')
			self.quote = 0
			self.descr = 1
		#print ("end of a %s" % tag)
	
	def handle_data(self, data):
		if self.start == 1:
			self.descr = data
		#print ("Data %s" % self.descr)


parser = MyHTMLParser(sys.argv[1],sys.argv[2])
parser.close()