X-Git-Url: https://git.llucax.com/software/subdivxget.git/blobdiff_plain/aa52c87e353921174d3b3874c2a261f175970a8c..44c70b16fa26aad38244f816c2b0ad3c3a170ecc:/subdivxget diff --git a/subdivxget b/subdivxget index 5c5cc7a..1aecc66 100755 --- a/subdivxget +++ b/subdivxget @@ -7,12 +7,13 @@ import subprocess import HTMLParser class SubDivXQuery: - def __init__(self, to_search): + def __init__(self, to_search, page_number): self.host = "www.subdivx.com" self.page = "/index.php" self.down_page = "/bajar.php" self.query = dict( buscar = to_search, + pg = page_number, accion = 5, masdesc = '', subtitulos = 1, @@ -29,8 +30,6 @@ class SubDivXQuery: def down_uri(self): return 'http://' + self.host + self.down_page -class Subtitle: - pass class SubDivXHTMLParser(HTMLParser.HTMLParser): @@ -48,7 +47,6 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): def handle_starttag(self, tag, attrs): attrs = dict(attrs) if tag == 'div' and attrs.get('id') == 'menu_detalle_buscador': - #self.cur = Subtitle() self.cur = dict() self.subs.append(self.cur) self.parsing = True @@ -74,38 +72,48 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.parsing = False def handle_data(self, data): - if self.parsing: - data = data.strip() - if self.attr is not None and data: - self.cur[self.attr] = data - self.attr = None - #self.cur[self.attr] = self.cur.get(self.attr, '') + data.strip() - #setattr(self.cur, self.attr, data.strip()) - elif data in ('Downloads:', 'Cds:', 'Comentarios:', - 'Formato:'): - self.attr = data[:-1].lower() - elif data == 'Subido por:': - self.attr = 'autor' - elif data == 'el': - self.attr = 'fecha' + if not self.parsing: + return + data = data.strip() + if self.attr is not None and data: + self.cur[self.attr] = data + self.attr = None + elif data in ('Downloads:', 'Cds:', 'Comentarios:', + 'Formato:'): + self.attr = data[:-1].lower() + elif data == 'Subido por:': + self.attr = 'autor' + elif data == 'el': + self.attr = 'fecha' -def get_subs(query_str): - query = SubDivXQuery(query_str) +def subdivx_get_subs(query_str): + page_number = 1 + subs = [] + while True: + query = SubDivXQuery(query_str, page_number) + url = urllib.urlopen(query.url) + parser = SubDivXHTMLParser(query.down_uri) - url = urllib.urlopen(query.url) + for line in url: + parser.feed(line) - parser = SubDivXHTMLParser(query.down_uri) + url.close() - for line in url: - parser.feed(line) + if not parser.subs: + break - url.close() + subs.extend(parser.subs) + page_number += 1 + return sorted(subs, key=lambda s: int(s['downloads']), reverse=True) + + +def get_subs(query_str): zip_exts = ('application/zip',) rar_exts = ('application/rar', 'application/x-rar-compressed') - for sub in sorted(parser.subs, key=lambda s: int(s['downloads']), reverse=True): + for sub in subdivx_get_subs(query_str): print '''\ - %(title)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s) %(desc)s