summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
44c70b1)
Those tags can have HTML comments inside of them, and they are not
parsed by HTMLParser, so we just ignore them to avoid issues, we don't
really need them for anything anyway.
self.parsing = False
self.subs = []
self.attr = None
self.parsing = False
self.subs = []
self.attr = None
+ self.in_script_style = False
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
self.parsing = True
if not self.parsing:
return
self.parsing = True
if not self.parsing:
return
+ if tag == 'script' or tag == 'style':
+ self.in_script_style = True
+ return
if tag == 'div':
if attrs.get('id') == 'buscador_detalle':
self.parsing = True
if tag == 'div':
if attrs.get('id') == 'buscador_detalle':
self.parsing = True
def handle_endtag(self, tag):
if self.parsing:
def handle_endtag(self, tag):
if self.parsing:
+ if tag == 'script' or tag == 'style':
+ self.in_script_style = False
+ return
self.depth -= 1
if self.depth == 0:
self.parsing = False
self.depth -= 1
if self.depth == 0:
self.parsing = False
if not self.parsing:
return
data = data.strip()
if not self.parsing:
return
data = data.strip()
+ # Hack to handle comments in <script> <style> which don't end
+ # up in handle_comment(), so we just ignore the whole tags
+ if self.in_script_style:
+ return
if self.attr is not None and data:
self.cur[self.attr] = data
self.attr = None
if self.attr is not None and data:
self.cur[self.attr] = data
self.attr = None