From aa52c87e353921174d3b3874c2a261f175970a8c Mon Sep 17 00:00:00 2001
From: Leandro Lucarella <luca@llucax.com.ar>
Date: Sat, 14 Jul 2012 20:12:51 +0200
Subject: [PATCH 1/1] Initial commit

---
 subdivxget | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100755 subdivxget

diff --git a/subdivxget b/subdivxget
new file mode 100755
index 0000000..5c5cc7a
--- /dev/null
+++ b/subdivxget
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+
+import sys
+import urllib
+import zipfile
+import subprocess
+import HTMLParser
+
+class SubDivXQuery:
+	def __init__(self, to_search):
+		self.host = "www.subdivx.com"
+		self.page = "/index.php"
+		self.down_page = "/bajar.php"
+		self.query = dict(
+			buscar = to_search,
+			accion = 5,
+			masdesc = '',
+			subtitulos = 1,
+			realiza_b = 1,
+		)
+	@property
+	def url(self):
+		return 'http://%s%s?%s' % (self.host, self.page,
+				urllib.urlencode(self.query))
+	@property
+	def page_uri(self):
+		return self.page + '?' + urllib.urlencode(self.query)
+	@property
+	def down_uri(self):
+		return 'http://' + self.host + self.down_page
+
+class Subtitle:
+	pass
+
+class SubDivXHTMLParser(HTMLParser.HTMLParser):
+
+	IDLE = 1
+	HEADER = 2
+
+	def __init__(self, down_uri):
+		HTMLParser.HTMLParser.__init__(self)
+		self.down_uri = down_uri
+		self.depth = 0
+		self.parsing = False
+		self.subs = []
+		self.attr = None
+
+	def handle_starttag(self, tag, attrs):
+		attrs = dict(attrs)
+		if tag == 'div' and attrs.get('id') == 'menu_detalle_buscador':
+			#self.cur = Subtitle()
+			self.cur = dict()
+			self.subs.append(self.cur)
+			self.parsing = True
+		if not self.parsing:
+			return
+		if tag == 'div':
+			if attrs.get('id') == 'buscador_detalle':
+				self.parsing = True
+			elif attrs.get('id') == 'buscador_detalle_sub':
+				self.attr = 'desc'
+		elif tag == 'a':
+			if attrs.get('class') == 'titulo_menu_izq':
+				self.attr = 'title'
+			elif attrs.get('href', '').startswith(self.down_uri):
+				self.cur['url'] = attrs['href']
+		if self.parsing:
+			self.depth += 1
+
+	def handle_endtag(self, tag):
+		if self.parsing:
+			self.depth -= 1
+		if self.depth == 0:
+			self.parsing = False
+
+	def handle_data(self, data):
+		if self.parsing:
+			data = data.strip()
+			if self.attr is not None and data:
+				self.cur[self.attr] = data
+				self.attr = None
+				#self.cur[self.attr] = self.cur.get(self.attr, '') + data.strip()
+				#setattr(self.cur, self.attr, data.strip())
+			elif data in ('Downloads:', 'Cds:', 'Comentarios:',
+					'Formato:'):
+				self.attr = data[:-1].lower()
+			elif data == 'Subido por:':
+				self.attr = 'autor'
+			elif data == 'el':
+				self.attr = 'fecha'
+
+
+def get_subs(query_str):
+	query = SubDivXQuery(query_str)
+
+	url = urllib.urlopen(query.url)
+
+	parser = SubDivXHTMLParser(query.down_uri)
+
+	for line in url:
+		parser.feed(line)
+
+	url.close()
+
+	zip_exts = ('application/zip',)
+	rar_exts = ('application/rar', 'application/x-rar-compressed')
+
+	for sub in sorted(parser.subs, key=lambda s: int(s['downloads']), reverse=True):
+		print '''\
+	- %(title)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s)
+	  %(desc)s
+		DOWNLOADING ...
+	''' % sub
+		fname, headers = urllib.urlretrieve(sub['url'])
+		if 'Content-Type' in headers:
+			if headers['Content-Type'] in zip_exts:
+				z = zipfile.ZipFile(fname, 'r')
+				z.printdir()
+				for fn in z.namelist():
+					if fn.endswith('.srt') or fn.endswith('.sub'):
+						if '..' in fn or fn.startswith('/'):
+							print 'Dangerous file name:', fn
+							continue
+						print 'Extracting', fn, '...'
+						z.extract(fn)
+			elif headers['Content-Type'] in rar_exts:
+				if subprocess.call(['rar', 'x', fname]) != 0:
+					print 'Error unraring file %s' % fname
+			else:
+				print 'Unrecognized file type:', headers['Content-Type']
+		else:
+			print 'No Content-Type!'
+
+
+for q in sys.argv[1:]:
+	get_subs(q)
+
-- 
2.43.0