X-Git-Url: https://git.llucax.com/software/subdivxget.git/blobdiff_plain/a53f6ed5e6cbd150f0fd27dbaae850f57eff33fe..f274666ba6db3400c33faf64aa40c3d2ba2d5156:/subdivxget?ds=sidebyside diff --git a/subdivxget b/subdivxget index aaa3d44..bd6fc6e 100755 --- a/subdivxget +++ b/subdivxget @@ -7,12 +7,13 @@ import subprocess import HTMLParser class SubDivXQuery: - def __init__(self, to_search): + def __init__(self, to_search, page_number): self.host = "www.subdivx.com" self.page = "/index.php" self.down_page = "/bajar.php" self.query = dict( buscar = to_search, + pg = page_number, accion = 5, masdesc = '', subtitulos = 1, @@ -42,6 +43,8 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.parsing = False self.subs = [] self.attr = None + self.cur = None + self.in_script_style = False def handle_starttag(self, tag, attrs): attrs = dict(attrs) @@ -51,6 +54,9 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.parsing = True if not self.parsing: return + if tag == 'script' or tag == 'style': + self.in_script_style = True + return if tag == 'div': if attrs.get('id') == 'buscador_detalle': self.parsing = True @@ -66,41 +72,60 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): def handle_endtag(self, tag): if self.parsing: + if tag == 'script' or tag == 'style': + self.in_script_style = False + return self.depth -= 1 if self.depth == 0: self.parsing = False def handle_data(self, data): - if self.parsing: - data = data.strip() - if self.attr is not None and data: - self.cur[self.attr] = data - self.attr = None - elif data in ('Downloads:', 'Cds:', 'Comentarios:', - 'Formato:'): - self.attr = data[:-1].lower() - elif data == 'Subido por:': - self.attr = 'autor' - elif data == 'el': - self.attr = 'fecha' + if not self.parsing: + return + data = data.strip() + # Hack to handle comments in