]> git.llucax.com Git - software/subdivxget.git/blob - subdivxget
Initial commit
[software/subdivxget.git] / subdivxget
1 #!/usr/bin/env python
2
3 import sys
4 import urllib
5 import zipfile
6 import subprocess
7 import HTMLParser
8
9 class SubDivXQuery:
10         def __init__(self, to_search):
11                 self.host = "www.subdivx.com"
12                 self.page = "/index.php"
13                 self.down_page = "/bajar.php"
14                 self.query = dict(
15                         buscar = to_search,
16                         accion = 5,
17                         masdesc = '',
18                         subtitulos = 1,
19                         realiza_b = 1,
20                 )
21         @property
22         def url(self):
23                 return 'http://%s%s?%s' % (self.host, self.page,
24                                 urllib.urlencode(self.query))
25         @property
26         def page_uri(self):
27                 return self.page + '?' + urllib.urlencode(self.query)
28         @property
29         def down_uri(self):
30                 return 'http://' + self.host + self.down_page
31
32 class Subtitle:
33         pass
34
35 class SubDivXHTMLParser(HTMLParser.HTMLParser):
36
37         IDLE = 1
38         HEADER = 2
39
40         def __init__(self, down_uri):
41                 HTMLParser.HTMLParser.__init__(self)
42                 self.down_uri = down_uri
43                 self.depth = 0
44                 self.parsing = False
45                 self.subs = []
46                 self.attr = None
47
48         def handle_starttag(self, tag, attrs):
49                 attrs = dict(attrs)
50                 if tag == 'div' and attrs.get('id') == 'menu_detalle_buscador':
51                         #self.cur = Subtitle()
52                         self.cur = dict()
53                         self.subs.append(self.cur)
54                         self.parsing = True
55                 if not self.parsing:
56                         return
57                 if tag == 'div':
58                         if attrs.get('id') == 'buscador_detalle':
59                                 self.parsing = True
60                         elif attrs.get('id') == 'buscador_detalle_sub':
61                                 self.attr = 'desc'
62                 elif tag == 'a':
63                         if attrs.get('class') == 'titulo_menu_izq':
64                                 self.attr = 'title'
65                         elif attrs.get('href', '').startswith(self.down_uri):
66                                 self.cur['url'] = attrs['href']
67                 if self.parsing:
68                         self.depth += 1
69
70         def handle_endtag(self, tag):
71                 if self.parsing:
72                         self.depth -= 1
73                 if self.depth == 0:
74                         self.parsing = False
75
76         def handle_data(self, data):
77                 if self.parsing:
78                         data = data.strip()
79                         if self.attr is not None and data:
80                                 self.cur[self.attr] = data
81                                 self.attr = None
82                                 #self.cur[self.attr] = self.cur.get(self.attr, '') + data.strip()
83                                 #setattr(self.cur, self.attr, data.strip())
84                         elif data in ('Downloads:', 'Cds:', 'Comentarios:',
85                                         'Formato:'):
86                                 self.attr = data[:-1].lower()
87                         elif data == 'Subido por:':
88                                 self.attr = 'autor'
89                         elif data == 'el':
90                                 self.attr = 'fecha'
91
92
93 def get_subs(query_str):
94         query = SubDivXQuery(query_str)
95
96         url = urllib.urlopen(query.url)
97
98         parser = SubDivXHTMLParser(query.down_uri)
99
100         for line in url:
101                 parser.feed(line)
102
103         url.close()
104
105         zip_exts = ('application/zip',)
106         rar_exts = ('application/rar', 'application/x-rar-compressed')
107
108         for sub in sorted(parser.subs, key=lambda s: int(s['downloads']), reverse=True):
109                 print '''\
110         - %(title)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s)
111           %(desc)s
112                 DOWNLOADING ...
113         ''' % sub
114                 fname, headers = urllib.urlretrieve(sub['url'])
115                 if 'Content-Type' in headers:
116                         if headers['Content-Type'] in zip_exts:
117                                 z = zipfile.ZipFile(fname, 'r')
118                                 z.printdir()
119                                 for fn in z.namelist():
120                                         if fn.endswith('.srt') or fn.endswith('.sub'):
121                                                 if '..' in fn or fn.startswith('/'):
122                                                         print 'Dangerous file name:', fn
123                                                         continue
124                                                 print 'Extracting', fn, '...'
125                                                 z.extract(fn)
126                         elif headers['Content-Type'] in rar_exts:
127                                 if subprocess.call(['rar', 'x', fname]) != 0:
128                                         print 'Error unraring file %s' % fname
129                         else:
130                                 print 'Unrecognized file type:', headers['Content-Type']
131                 else:
132                         print 'No Content-Type!'
133
134
135 for q in sys.argv[1:]:
136         get_subs(q)
137