From c79b74764e72a29ce82d3ec489c2433e8ad6ba20 Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Sat, 14 Jul 2012 20:09:46 +0200 Subject: [PATCH] Get all available pages SubDivX pages the results, so we keep downloading the next page to get all the available results until we can't find any subtitles in a page. --- subdivxget | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/subdivxget b/subdivxget index aaa3d44..5f67edb 100755 --- a/subdivxget +++ b/subdivxget @@ -7,12 +7,13 @@ import subprocess import HTMLParser class SubDivXQuery: - def __init__(self, to_search): + def __init__(self, to_search, page_number): self.host = "www.subdivx.com" self.page = "/index.php" self.down_page = "/bajar.php" self.query = dict( buscar = to_search, + pg = page_number, accion = 5, masdesc = '', subtitulos = 1, @@ -85,22 +86,33 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.attr = 'fecha' -def get_subs(query_str): - query = SubDivXQuery(query_str) +def subdivx_get_subs(query_str): + page_number = 1 + subs = [] + while True: + query = SubDivXQuery(query_str, page_number) + url = urllib.urlopen(query.url) + parser = SubDivXHTMLParser(query.down_uri) + + for line in url: + parser.feed(line) - url = urllib.urlopen(query.url) + url.close() - parser = SubDivXHTMLParser(query.down_uri) + if not parser.subs: + break - for line in url: - parser.feed(line) + subs.extend(parser.subs) + page_number += 1 - url.close() + return sorted(subs, key=lambda s: int(s['downloads']), reverse=True) + +def get_subs(query_str): zip_exts = ('application/zip',) rar_exts = ('application/rar', 'application/x-rar-compressed') - for sub in sorted(parser.subs, key=lambda s: int(s['downloads']), reverse=True): + for sub in subdivx_get_subs(query_str): print '''\ - %(title)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s) %(desc)s -- 2.43.0