X-Git-Url: https://git.llucax.com/software/blitiri.git/blobdiff_plain/464df08f44fddca976f584e5daaf538adc4f443d..74a71431203c30c3b47ca985ec3d08e1ac38b080:/blitiri.cgi

diff --git a/blitiri.cgi b/blitiri.cgi
index e69789f..6ada91b 100755
--- a/blitiri.cgi
+++ b/blitiri.cgi
@@ -24,6 +24,12 @@ comments_path = "/tmp/blog/comments"
 # default templates. If they're not found, the built-in ones will be used.
 templates_path = "/tmp/blog/templates"
 
+# Path where the cache is stored (must be writeable by the web server);
+# set to None to disable. When enabled, you must take care of cleaning it up
+# every once in a while.
+#cache_path = "/tmp/blog/cache"
+cache_path = None
+
 # URL to the blog, including the name. Can be a full URL or just the path.
 blog_url = "/blog/blitiri.cgi"
 
@@ -40,32 +46,10 @@ author = "Hartmut Kegan"
 # Article encoding
 encoding = "utf8"
 
-# Captcha class
-class Captcha (object):
-	def __init__(self, article):
-		self.article = article
-		words = article.title.split()
-		self.nword = hash(article.title) % len(words) % 5
-		self.answer = words[self.nword]
-		self.help = 'gotcha, damn spam bot!'
-
-	def get_puzzle(self):
-		nword = self.nword + 1
-		if nword == 1:
-			n = '1st'
-		elif nword == 2:
-			n = '2nd'
-		elif nword == 3:
-			n = '3rd'
-		else:
-			n = str(nword) + 'th'
-		return "enter the %s word of the article's title" % n
-	puzzle = property(fget = get_puzzle)
-
-	def validate(self, form_data):
-		if form_data.captcha.lower() == self.answer.lower():
-			return True
-		return False
+# Captcha method to use. At the moment only "title" is supported, but if you
+# are keen with Python you can provide your own captcha implementation, see
+# below for details.
+captcha_method = "title"
 
 
 #
@@ -101,6 +85,84 @@ except:
 data_path = os.path.realpath(data_path)
 templates_path = os.path.realpath(templates_path)
 
+
+#
+# Captcha classes
+#
+# They must follow the interface described below.
+#
+# Constructor:
+# 	Captcha(article) -> constructor, takes an article[1] as argument
+# Attributes:
+# 	puzzle -> a string with the puzzle the user must solve to prove he is
+# 	          not a bot (can be raw HTML)
+# 	help -> a string with extra instructions, shown only when the user
+# 	        failed to solve the puzzle
+# Methods:
+#	validate(form_data) -> based on the form data[2],  returns True if
+#	                       the user has solved the puzzle uccessfully
+#	                       (False otherwise).
+#
+# Note you must ensure that the puzzle attribute and validate() method can
+# "communicate" because they are executed in different requests. You can pass a
+# cookie or just calculate the answer based on the article's data, for example.
+#
+# [1] article is an object with all the article's information:
+# 	path -> string
+# 	created -> datetime
+# 	updated -> datetime
+# 	uuid -> string (unique ID)
+# 	title -> string
+# 	author -> string
+# 	tags -> list of strings
+# 	raw_contents -> string in rst format
+# 	comments -> list of Comment objects (not too relevant here)
+# [2] form_data is an object with the form fields (all strings):
+# 	author, author_error
+# 	link, link_error
+# 	catpcha, captcha_error
+# 	body, body_error
+# 	action, method
+
+class TitleCaptcha (object):
+	"Captcha that uses the article's title for the puzzle"
+	def __init__(self, article):
+		self.article = article
+		words = article.title.split()
+		self.nword = hash(article.title) % len(words) % 5
+		self.answer = words[self.nword]
+		self.help = 'gotcha, damn spam bot!'
+
+	@property
+	def puzzle(self):
+		nword = self.nword + 1
+		if nword == 1:
+			n = '1st'
+		elif nword == 2:
+			n = '2nd'
+		elif nword == 3:
+			n = '3rd'
+		else:
+			n = str(nword) + 'th'
+		return "enter the %s word of the article's title" % n
+
+	def validate(self, form_data):
+		if form_data.captcha.lower() == self.answer.lower():
+			return True
+		return False
+
+known_captcha_methods = {
+	'title': TitleCaptcha,
+}
+
+# If the configured captcha method was a known string, replace it by the
+# matching class; otherwise assume it's already a class and leave it
+# alone. This way the user can either use one of our methods, or provide one
+# of his/her own.
+if captcha_method in known_captcha_methods:
+	captcha_method = known_captcha_methods[captcha_method]
+
+
 # Default template
 
 default_main_header = """\
@@ -404,26 +466,53 @@ div.section h1 {
 
 """
 
+
+# Cache decorator
+# It only works if the function is pure (that is, its return value depends
+# only on its arguments), and if all the arguments are hash()eable.
+def cached(f):
+	# do not decorate if the cache is disabled
+	if cache_path is None:
+		return f
+
+	def decorate(*args, **kwargs):
+		hashes = '-'.join( str(hash(x)) for x in args +
+				tuple(kwargs.items()) )
+		fname = 'blitiri.%s.%s.cache' % (f.__name__, hashes)
+		cache_file = os.path.join(cache_path, fname)
+		try:
+			s = open(cache_file).read()
+		except:
+			s = f(*args, **kwargs)
+			open(cache_file, 'w').write(s)
+		return s
+
+	return decorate
+
+
 # helper functions
-def rst_to_html(rst):
+@cached
+def rst_to_html(rst, secure = True):
 	settings = {
 		'input_encoding': encoding,
 		'output_encoding': 'utf8',
 		'halt_level': 1,
 		'traceback':  1,
+		'file_insertion_enabled': secure,
+		'raw_enabled': secure,
 	}
 	parts = publish_parts(rst, settings_overrides = settings,
 				writer_name = "html")
 	return parts['body'].encode('utf8')
 
-def validate_rst(rst):
+def validate_rst(rst, secure = True):
 	try:
-		rst_to_html(rst)
+		rst_to_html(rst, secure)
 		return None
 	except SystemMessage, e:
 		desc = e.args[0].encode('utf-8') # the error string
 		desc = desc[9:] # remove "<string>:"
-		line = int(desc[:desc.find(':')]) # get just the line number
+		line = int(desc[:desc.find(':')] or 0) # get the line number
 		desc = desc[desc.find(')')+2:-1] # remove (LEVEL/N)
 		try:
 			desc, context = desc.split('\n', 1)
@@ -435,20 +524,28 @@ def validate_rst(rst):
 
 def valid_link(link):
 	import re
-	mail_re = r"^[^ \t\n\r@<>()]+@[a-z0-9][a-z0-9\.\-_]*\.[a-z]+$"
 	scheme_re = r'^[a-zA-Z]+:'
+	mail_re = r"^[^ \t\n\r@<>()]+@[a-z0-9][a-z0-9\.\-_]*\.[a-z]+$"
 	url_re = r'^(?:[a-z0-9\-]+|[a-z0-9][a-z0-9\-\.\_]*\.[a-z]+)' \
 			r'(?::[0-9]+)?(?:/.*)?$'
-	scheme = ''
-	rest = link
+
 	if re.match(scheme_re, link, re.I):
 		scheme, rest = link.split(':', 1)
-	if (not scheme or scheme == 'mailto') and re.match(mail_re, rest, re.I):
+		# if we have an scheme and a rest, assume the link is valid
+		# and return it as-is; otherwise (having just the scheme) is
+		# invalid
+		if rest:
+			return link
+		return None
+
+	# at this point, we don't have a scheme; we will try to recognize some
+	# common addresses (mail and http at the moment) and complete them to
+	# form a valid link, if we fail we will just claim it's invalid
+	if re.match(mail_re, link, re.I):
 		return 'mailto:' + link
-	if not scheme and re.match(url_re, rest, re.I):
-		return 'http://' + rest
-	if scheme:
-		return link
+	elif re.match(url_re, link, re.I):
+		return 'http://' + link
+
 	return None
 
 def sanitize(obj):
@@ -594,25 +691,23 @@ class Comment (object):
 		self._link = ''
 		self._raw_content = 'Removed comment'
 
-
-	def get_author(self):
+	@property
+	def author(self):
 		if not self.loaded:
 			self.load()
 		return self._author
-	author = property(fget = get_author)
 
-	def get_link(self):
+	@property
+	def link(self):
 		if not self.loaded:
 			self.load()
 		return self._link
-	link = property(fget = get_link)
 
-	def get_raw_content(self):
+	@property
+	def raw_content(self):
 		if not self.loaded:
 			self.load()
 		return self._raw_content
-	raw_content = property(fget = get_raw_content)
-
 
 	def set(self, author, raw_content, link = '', created = None):
 		self.loaded = True
@@ -680,6 +775,11 @@ class Comment (object):
 class CommentDB (object):
 	def __init__(self, article):
 		self.path = os.path.join(comments_path, article.uuid)
+		# if comments were enabled after the article was added, we
+		# will need to create the directory
+		if not os.path.exists(self.path):
+			os.mkdir(self.path, 0755)
+
 		self.comments = []
 		self.load(article)
 
@@ -736,37 +836,35 @@ class Article (object):
 		self._raw_content = ''
 		self._comments = []
 
-
-	def get_title(self):
+	@property
+	def title(self):
 		if not self.loaded:
 			self.load()
 		return self._title
-	title = property(fget = get_title)
 
-	def get_author(self):
+	@property
+	def author(self):
 		if not self.loaded:
 			self.load()
 		return self._author
-	author = property(fget = get_author)
 
-	def get_tags(self):
+	@property
+	def tags(self):
 		if not self.loaded:
 			self.load()
 		return self._tags
-	tags = property(fget = get_tags)
 
-	def get_raw_content(self):
+	@property
+	def raw_content(self):
 		if not self.loaded:
 			self.load()
 		return self._raw_content
-	raw_content = property(fget = get_raw_content)
 
-	def get_comments(self):
+	@property
+	def comments(self):
 		if not self.loaded:
 			self.load()
 		return self._comments
-	comments = property(fget = get_comments)
-
 
 	def __cmp__(self, other):
 		if self.path == other.path:
@@ -957,7 +1055,7 @@ def render_comments(article, template, form_data):
 	if not form_data:
 		form_data = CommentFormData()
 	form_data.action = blog_url + '/comment/' + article.uuid + '#comment'
-	captcha = Captcha(article)
+	captcha = captcha_method(article)
 	print template.get_comment_form(article, form_data, captcha.puzzle)
 
 def render_html(articles, db, actyear = None, show_comments = False,
@@ -983,7 +1081,7 @@ def render_artlist(articles, db, actyear = None):
 	print template.get_main_header()
 	print '<h2>Articles</h2>'
 	for a in articles:
-		print '<li><a href="%(url)s/uuid/%(uuid)s">%(title)s</a></li>' \
+		print '<li><a href="%(url)s/post/%(uuid)s">%(title)s</a></li>' \
 			% {	'url': blog_url,
 				'uuid': a.uuid,
 				'title': a.title,
@@ -1054,6 +1152,7 @@ def handle_cgi():
 	atom = False
 	style = False
 	post = False
+	post_preview = False
 	artlist = False
 	comment = False
 
@@ -1063,10 +1162,11 @@ def handle_cgi():
 		atom = path_info == '/atom'
 		tag = path_info.startswith('/tag/')
 		post = path_info.startswith('/post/')
+		post_preview = path_info.startswith('/preview/post/')
 		artlist = path_info.startswith('/list')
 		comment = path_info.startswith('/comment/') and enable_comments
-		if not style and not atom and not post and not tag \
-				and not comment and not artlist:
+		if not style and not atom and not post and not post_preview \
+				and not tag and not comment and not artlist:
 			date = path_info.split('/')[1:]
 			try:
 				if len(date) > 1 and date[0]:
@@ -1080,6 +1180,15 @@ def handle_cgi():
 		elif post:
 			uuid = path_info.replace('/post/', '')
 			uuid = uuid.replace('/', '')
+		elif post_preview:
+			art_path = path_info.replace('/preview/post/', '')
+			art_path = urllib.unquote_plus(art_path)
+			art_path = os.path.join(data_path, art_path)
+			art_path = os.path.realpath(art_path)
+			common = os.path.commonprefix([data_path, art_path])
+			if common != data_path: # something nasty happened
+				post_preview = False
+			art_path = art_path[len(data_path)+1:]
 		elif tag:
 			t = path_info.replace('/tag/', '')
 			t = t.replace('/', '')
@@ -1103,15 +1212,20 @@ def handle_cgi():
 		render_style()
 	elif post:
 		render_html( [db.get_article(uuid)], db, year, enable_comments )
+	elif post_preview:
+		article = Article(art_path, datetime.datetime.now(),
+					datetime.datetime.now())
+		render_html( [article], db, year, enable_comments )
 	elif artlist:
 		articles = db.get_articles()
 		articles.sort(cmp = Article.title_cmp)
 		render_artlist(articles, db)
 	elif comment:
 		form_data = CommentFormData(author.strip().replace('\n', ' '),
-				link.strip().replace('\n', ' '), captcha, body)
+				link.strip().replace('\n', ' '), captcha,
+				body.replace('\r', ''))
 		article = db.get_article(uuid)
-		captcha = Captcha(article)
+		captcha = captcha_method(article)
 		redirect = False
 		valid = True
 		if not form_data.author:
@@ -1132,11 +1246,14 @@ def handle_cgi():
 			form_data.body_error = 'please, write a comment'
 			valid = False
 		else:
-			error = validate_rst(form_data.body)
+			error = validate_rst(form_data.body, secure=False)
 			if error is not None:
 				(line, desc, ctx) = error
-				form_data.body_error = 'error at line %d: %s' \
-						% (line, desc)
+				at = ''
+				if line:
+					at = ' at line %d' % line
+				form_data.body_error = 'error%s: %s' \
+						% (at, desc)
 				valid = False
 		if valid:
 			c = article.add_comment(form_data.author,
@@ -1231,7 +1348,10 @@ def handle_cmd():
 
 
 if os.environ.has_key('GATEWAY_INTERFACE'):
+	i = datetime.datetime.now()
 	handle_cgi()
+	f = datetime.datetime.now()
+	print '<!-- render time: %s -->' % (f-i)
 else:
 	sys.exit(handle_cmd())