From: Leandro Lucarella Date: Sat, 14 Jul 2012 18:09:51 +0000 (+0200) Subject: Ignore br tags for tag depth calculations X-Git-Url: https://git.llucax.com/software/subdivxget.git/commitdiff_plain/ac39e3e3707a898402033588e822d01c8e6be9f9?ds=sidebyside Ignore br tags for tag depth calculations SubDivX uses old, unclosed br tags, so they break the tag depth calculation. --- diff --git a/subdivxget b/subdivxget index 395eb35..d7c32d1 100755 --- a/subdivxget +++ b/subdivxget @@ -67,7 +67,8 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.attr = 'titulo' elif attrs.get('href', '').startswith(self.down_uri): self.cur['url'] = attrs['href'] - if self.parsing: + # br are usually not closed, so ignore them in depth calculation + if self.parsing and tag != 'br': self.depth += 1 def handle_endtag(self, tag): @@ -75,7 +76,9 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): if tag == 'script' or tag == 'style': self.in_script_style = False return - self.depth -= 1 + # see comment in handle_starttag() + if tag != 'br': + self.depth -= 1 if self.depth == 0: self.parsing = False