X-Git-Url: https://git.llucax.com/software/subdivxget.git/blobdiff_plain/44c70b16fa26aad38244f816c2b0ad3c3a170ecc..4907f9c39086861e391bcfbb740e38e23b3fbea2:/subdivxget?ds=sidebyside diff --git a/subdivxget b/subdivxget index 1aecc66..b5d01ff 100755 --- a/subdivxget +++ b/subdivxget @@ -43,6 +43,7 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.parsing = False self.subs = [] self.attr = None + self.in_script_style = False def handle_starttag(self, tag, attrs): attrs = dict(attrs) @@ -52,6 +53,9 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): self.parsing = True if not self.parsing: return + if tag == 'script' or tag == 'style': + self.in_script_style = True + return if tag == 'div': if attrs.get('id') == 'buscador_detalle': self.parsing = True @@ -67,6 +71,9 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): def handle_endtag(self, tag): if self.parsing: + if tag == 'script' or tag == 'style': + self.in_script_style = False + return self.depth -= 1 if self.depth == 0: self.parsing = False @@ -75,6 +82,10 @@ class SubDivXHTMLParser(HTMLParser.HTMLParser): if not self.parsing: return data = data.strip() + # Hack to handle comments in