#!/usr/bin/env python

import sys
import urllib
import zipfile
import subprocess
import HTMLParser

class SubDivXQuery:
	def __init__(self, to_search):
		self.host = "www.subdivx.com"
		self.page = "/index.php"
		self.down_page = "/bajar.php"
		self.query = dict(
			buscar = to_search,
			accion = 5,
			masdesc = '',
			subtitulos = 1,
			realiza_b = 1,
		)
	@property
	def url(self):
		return 'http://%s%s?%s' % (self.host, self.page,
				urllib.urlencode(self.query))
	@property
	def page_uri(self):
		return self.page + '?' + urllib.urlencode(self.query)
	@property
	def down_uri(self):
		return 'http://' + self.host + self.down_page


class SubDivXHTMLParser(HTMLParser.HTMLParser):

	IDLE = 1
	HEADER = 2

	def __init__(self, down_uri):
		HTMLParser.HTMLParser.__init__(self)
		self.down_uri = down_uri
		self.depth = 0
		self.parsing = False
		self.subs = []
		self.attr = None

	def handle_starttag(self, tag, attrs):
		attrs = dict(attrs)
		if tag == 'div' and attrs.get('id') == 'menu_detalle_buscador':
			self.cur = dict()
			self.subs.append(self.cur)
			self.parsing = True
		if not self.parsing:
			return
		if tag == 'div':
			if attrs.get('id') == 'buscador_detalle':
				self.parsing = True
			elif attrs.get('id') == 'buscador_detalle_sub':
				self.attr = 'desc'
		elif tag == 'a':
			if attrs.get('class') == 'titulo_menu_izq':
				self.attr = 'title'
			elif attrs.get('href', '').startswith(self.down_uri):
				self.cur['url'] = attrs['href']
		if self.parsing:
			self.depth += 1

	def handle_endtag(self, tag):
		if self.parsing:
			self.depth -= 1
		if self.depth == 0:
			self.parsing = False

	def handle_data(self, data):
		if self.parsing:
			data = data.strip()
			if self.attr is not None and data:
				self.cur[self.attr] = data
				self.attr = None
			elif data in ('Downloads:', 'Cds:', 'Comentarios:',
					'Formato:'):
				self.attr = data[:-1].lower()
			elif data == 'Subido por:':
				self.attr = 'autor'
			elif data == 'el':
				self.attr = 'fecha'


def get_subs(query_str):
	query = SubDivXQuery(query_str)

	url = urllib.urlopen(query.url)

	parser = SubDivXHTMLParser(query.down_uri)

	for line in url:
		parser.feed(line)

	url.close()

	zip_exts = ('application/zip',)
	rar_exts = ('application/rar', 'application/x-rar-compressed')

	for sub in sorted(parser.subs, key=lambda s: int(s['downloads']), reverse=True):
		print '''\
	- %(title)s (%(autor)s - %(fecha)s - %(downloads)s - %(comentarios)s)
	  %(desc)s
		DOWNLOADING ...
	''' % sub
		fname, headers = urllib.urlretrieve(sub['url'])
		if 'Content-Type' in headers:
			if headers['Content-Type'] in zip_exts:
				z = zipfile.ZipFile(fname, 'r')
				z.printdir()
				for fn in z.namelist():
					if fn.endswith('.srt') or fn.endswith('.sub'):
						if '..' in fn or fn.startswith('/'):
							print 'Dangerous file name:', fn
							continue
						print 'Extracting', fn, '...'
						z.extract(fn)
			elif headers['Content-Type'] in rar_exts:
				if subprocess.call(['rar', 'x', fname]) != 0:
					print 'Error unraring file %s' % fname
			else:
				print 'Unrecognized file type:', headers['Content-Type']
		else:
			print 'No Content-Type!'


for q in sys.argv[1:]:
	get_subs(q)