X-Git-Url: https://git.llucax.com/software/subdivxget.git/blobdiff_plain/fc1741837b1c15d0fed737b7798fe21ec7d33c64..a60a67c9c4510e35c47c79fcd6afd7674e3a1607:/subdivxget diff --git a/subdivxget b/subdivxget index 38d46c4..ae79cde 100755 --- a/subdivxget +++ b/subdivxget @@ -1,10 +1,20 @@ #!/usr/bin/env python import sys -import urllib +if sys.version_info[0] < 3: + from HTMLParser import HTMLParser + from urllib import urlopen, urlretrieve, urlencode + def get_encoding(info): + return info.getparam('charset') + +else: + from html.parser import HTMLParser + from urllib.request import urlopen, urlretrieve + from urllib.parse import urlencode + def get_encoding(info): + return info.get_content_charset('ascii') import zipfile import subprocess -import HTMLParser class SubDivXQuery: def __init__(self, to_search, page_number): @@ -22,22 +32,22 @@ class SubDivXQuery: @property def url(self): return 'http://%s%s?%s' % (self.host, self.page, - urllib.urlencode(self.query)) + urlencode(self.query)) @property def page_uri(self): - return self.page + '?' + urllib.urlencode(self.query) + return self.page + '?' + urlencode(self.query) @property def down_uri(self): return 'http://' + self.host + self.down_page -class SubDivXHTMLParser(HTMLParser.HTMLParser): +class SubDivXHTMLParser(HTMLParser): IDLE = 1 HEADER = 2 def __init__(self, down_uri): - HTMLParser.HTMLParser.__init__(self) + HTMLParser.__init__(self) self.down_uri = down_uri self.depth = 0 self.parsing = False @@ -173,11 +183,16 @@ def subdivx_get_subs(query_str): subs = [] while True: query = SubDivXQuery(query_str, page_number) - url = urllib.urlopen(query.url) + url = urlopen(query.url) parser = SubDivXHTMLParser(query.down_uri) + try: + encoding = get_encoding(url.info()) + except: + encoding = 'ascii' + for line in url: - parser.feed(line) + parser.feed(line.decode(encoding)) url.close() @@ -198,12 +213,13 @@ def get_subs(query_str, filters): subs = filter_subtitles(subs, filters) for sub in subs: - print '''\ + print('''\ - %(titulo)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s) %(desc)s DOWNLOADING ... -''' % sub - fname, headers = urllib.urlretrieve(sub['url']) +''' % sub) + continue + fname, headers = urlretrieve(sub['url']) if 'Content-Type' in headers: if headers['Content-Type'] in zip_exts: z = zipfile.ZipFile(fname, 'r') @@ -211,17 +227,17 @@ def get_subs(query_str, filters): for fn in z.namelist(): if fn.endswith('.srt') or fn.endswith('.sub'): if '..' in fn or fn.startswith('/'): - print 'Dangerous file name:', fn + print('Dangerous file name:', fn) continue - print 'Extracting', fn, '...' + print('Extracting', fn, '...') z.extract(fn) elif headers['Content-Type'] in rar_exts: if subprocess.call(['rar', 'x', fname]) != 0: - print 'Error unraring file %s' % fname + print('Error unraring file %s' % fname) else: - print 'Unrecognized file type:', headers['Content-Type'] + print('Unrecognized file type:', headers['Content-Type']) else: - print 'No Content-Type!' + print('No Content-Type!') get_subs(sys.argv[1], sys.argv[2:])