10 def __init__(self, to_search, page_number):
11 self.host = "www.subdivx.com"
12 self.page = "/index.php"
13 self.down_page = "/bajar.php"
24 return 'http://%s%s?%s' % (self.host, self.page,
25 urllib.urlencode(self.query))
28 return self.page + '?' + urllib.urlencode(self.query)
31 return 'http://' + self.host + self.down_page
34 class SubDivXHTMLParser(HTMLParser.HTMLParser):
39 def __init__(self, down_uri):
40 HTMLParser.HTMLParser.__init__(self)
41 self.down_uri = down_uri
47 self.in_script_style = False
49 def handle_starttag(self, tag, attrs):
51 if tag == 'div' and attrs.get('id') == 'menu_detalle_buscador':
53 self.subs.append(self.cur)
57 if tag == 'script' or tag == 'style':
58 self.in_script_style = True
61 if attrs.get('id') == 'buscador_detalle':
63 elif attrs.get('id') == 'buscador_detalle_sub':
66 if attrs.get('class') == 'titulo_menu_izq':
68 elif attrs.get('href', '').startswith(self.down_uri):
69 self.cur['url'] = attrs['href']
70 # br are usually not closed, so ignore them in depth calculation
71 if self.parsing and tag != 'br':
74 def handle_endtag(self, tag):
76 if tag == 'script' or tag == 'style':
77 self.in_script_style = False
79 # see comment in handle_starttag()
85 def handle_data(self, data):
89 # Hack to handle comments in <script> <style> which don't end
90 # up in handle_comment(), so we just ignore the whole tags
91 if self.in_script_style:
93 if self.attr is not None and data:
94 self.cur[self.attr] = data
96 elif data in ('Downloads:', 'Cds:', 'Comentarios:',
98 self.attr = data[:-1].lower()
99 elif data == 'Subido por:':
105 def subdivx_get_subs(query_str):
109 query = SubDivXQuery(query_str, page_number)
110 url = urllib.urlopen(query.url)
111 parser = SubDivXHTMLParser(query.down_uri)
121 subs.extend(parser.subs)
124 return sorted(subs, key=lambda s: int(s['downloads']), reverse=True)
127 def get_subs(query_str):
128 zip_exts = ('application/zip',)
129 rar_exts = ('application/rar', 'application/x-rar-compressed')
131 for sub in subdivx_get_subs(query_str):
133 - %(titulo)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s)
137 fname, headers = urllib.urlretrieve(sub['url'])
138 if 'Content-Type' in headers:
139 if headers['Content-Type'] in zip_exts:
140 z = zipfile.ZipFile(fname, 'r')
142 for fn in z.namelist():
143 if fn.endswith('.srt') or fn.endswith('.sub'):
144 if '..' in fn or fn.startswith('/'):
145 print 'Dangerous file name:', fn
147 print 'Extracting', fn, '...'
149 elif headers['Content-Type'] in rar_exts:
150 if subprocess.call(['rar', 'x', fname]) != 0:
151 print 'Error unraring file %s' % fname
153 print 'Unrecognized file type:', headers['Content-Type']
155 print 'No Content-Type!'
158 for q in sys.argv[1:]: