From ac39e3e3707a898402033588e822d01c8e6be9f9 Mon Sep 17 00:00:00 2001 From: Leandro Lucarella Date: Sat, 14 Jul 2012 20:09:51 +0200 Subject: [PATCH] Ignore br tags for tag depth calculations SubDivX uses old, unclosed br tags, so they break the tag depth calculation. --- subdivxget | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subdivxget b/subdivxget index 395eb35..d7c32d1 100755 --- a/subdivxget +++ b/subdivxget @@ -67,7 +67,8 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.attr = 'titulo' elif attrs.get('href', '').startswith(self.down_uri): self.cur['url'] = attrs['href'] - if self.parsing: + # br are usually not closed, so ignore them in depth calculation + if self.parsing and tag != 'br': self.depth += 1 def handle_endtag(self, tag): @@ -75,7 +76,9 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): if tag == 'script' or tag == 'style': self.in_script_style = False return - self.depth -= 1 + # see comment in handle_starttag() + if tag != 'br': + self.depth -= 1 if self.depth == 0: self.parsing = False -- 2.43.0