Allow custom sorting when displaying several articles

[software/blitiri.git] / blitiri.cgi
diff --git a/blitiri.cgi b/blitiri.cgi

index d825efc0d0bdd46c086fbc113c67317791c4f519..d709f305658e43d375171040fcbae698df3e4821 100755 (executable)
--- a/blitiri.cgi
+++ b/blitiri.cgi
@@ -24,6 +24,12 @@ comments_path = "/tmp/blog/comments"
  # default templates. If they're not found, the built-in ones will be used.
  templates_path = "/tmp/blog/templates"
  
+# Path where the cache is stored (must be writeable by the web server);
+# set to None to disable. When enabled, you must take care of cleaning it up
+# every once in a while.
+#cache_path = "/tmp/blog/cache"
+cache_path = None
+
  # URL to the blog, including the name. Can be a full URL or just the path.
  blog_url = "/blog/blitiri.cgi"
  
@@ -40,6 +46,15 @@ author = "Hartmut Kegan"
  # Article encoding
  encoding = "utf8"
  
+# Captcha method to use. At the moment only "title" is supported, but if you
+# are keen with Python you can provide your own captcha implementation, see
+# below for details.
+captcha_method = "title"
+
+# How many articles to show in the index
+index_articles = 10
+
+
  #
  # End of configuration
  # DO *NOT* EDIT ANYTHING PAST HERE
@@ -73,6 +88,84 @@ except:
  data_path = os.path.realpath(data_path)
  templates_path = os.path.realpath(templates_path)
  
+
+#
+# Captcha classes
+#
+# They must follow the interface described below.
+#
+# Constructor:
+#      Captcha(article) -> constructor, takes an article[1] as argument
+# Attributes:
+#      puzzle -> a string with the puzzle the user must solve to prove he is
+#                not a bot (can be raw HTML)
+#      help -> a string with extra instructions, shown only when the user
+#              failed to solve the puzzle
+# Methods:
+#      validate(form_data) -> based on the form data[2],  returns True if
+#                             the user has solved the puzzle uccessfully
+#                             (False otherwise).
+#
+# Note you must ensure that the puzzle attribute and validate() method can
+# "communicate" because they are executed in different requests. You can pass a
+# cookie or just calculate the answer based on the article's data, for example.
+#
+# [1] article is an object with all the article's information:
+#      path -> string
+#      created -> datetime
+#      updated -> datetime
+#      uuid -> string (unique ID)
+#      title -> string
+#      author -> string
+#      tags -> list of strings
+#      raw_contents -> string in rst format
+#      comments -> list of Comment objects (not too relevant here)
+# [2] form_data is an object with the form fields (all strings):
+#      author, author_error
+#      link, link_error
+#      catpcha, captcha_error
+#      body, body_error
+#      action, method
+
+class TitleCaptcha (object):
+       "Captcha that uses the article's title for the puzzle"
+       def __init__(self, article):
+               self.article = article
+               words = article.title.split()
+               self.nword = hash(article.title) % len(words) % 5
+               self.answer = words[self.nword]
+               self.help = 'gotcha, damn spam bot!'
+
+       @property
+       def puzzle(self):
+               nword = self.nword + 1
+               if nword == 1:
+                       n = '1st'
+               elif nword == 2:
+                       n = '2nd'
+               elif nword == 3:
+                       n = '3rd'
+               else:
+                       n = str(nword) + 'th'
+               return "enter the %s word of the article's title" % n
+
+       def validate(self, form_data):
+               if form_data.captcha.lower() == self.answer.lower():
+                       return True
+               return False
+
+known_captcha_methods = {
+       'title': TitleCaptcha,
+}
+
+# If the configured captcha method was a known string, replace it by the
+# matching class; otherwise assume it's already a class and leave it
+# alone. This way the user can either use one of our methods, or provide one
+# of his/her own.
+if captcha_method in known_captcha_methods:
+       captcha_method = known_captcha_methods[captcha_method]
+
+
  # Default template
  
  default_main_header = """\
@@ -102,6 +195,7 @@ default_main_footer = """
    years: %(yearlinks)s<br/>
    subscribe: <a href="%(url)s/atom">atom</a><br/>
    views: <a href="%(url)s/">blog</a> <a href="%(url)s/list">list</a><br/>
+  tags: %(taglinks)s<br/>
  </div>
  
  </body>
@@ -175,6 +269,12 @@ default_comment_form = """
      or <span class="formurlexample">mailto:you@example.com</span>
    </div>
  </div>
+<div class="comformcaptcha">
+  <label for="comformcaptcha">Your humanity proof %(form_captcha_error)s</label>
+  <input type="text" class="comformcaptcha" id="comformcaptcha"
+         name="comformcaptcha" value="%(form_captcha)s" />
+  <div class="comformhelp">%(captcha_puzzle)s</div>
+</div>
  <div class="comformbody">
    <label for="comformbody" class="comformbody">The comment
      %(form_body_error)s</label>
@@ -370,26 +470,53 @@ div.section h1 {
  
  """
  
+
+# Cache decorator
+# It only works if the function is pure (that is, its return value depends
+# only on its arguments), and if all the arguments are hash()eable.
+def cached(f):
+       # do not decorate if the cache is disabled
+       if cache_path is None:
+               return f
+
+       def decorate(*args, **kwargs):
+               hashes = '-'.join( str(hash(x)) for x in args +
+                               tuple(kwargs.items()) )
+               fname = 'blitiri.%s.%s.cache' % (f.__name__, hashes)
+               cache_file = os.path.join(cache_path, fname)
+               try:
+                       s = open(cache_file).read()
+               except:
+                       s = f(*args, **kwargs)
+                       open(cache_file, 'w').write(s)
+               return s
+
+       return decorate
+
+
  # helper functions
-def rst_to_html(rst):
+@cached
+def rst_to_html(rst, secure = True):
         settings = {
                 'input_encoding': encoding,
                 'output_encoding': 'utf8',
                 'halt_level': 1,
                 'traceback':  1,
+               'file_insertion_enabled': secure,
+               'raw_enabled': secure,
         }
         parts = publish_parts(rst, settings_overrides = settings,
                                 writer_name = "html")
         return parts['body'].encode('utf8')
  
-def validate_rst(rst):
+def validate_rst(rst, secure = True):
         try:
-               rst_to_html(rst)
+               rst_to_html(rst, secure)
                 return None
         except SystemMessage, e:
                 desc = e.args[0].encode('utf-8') # the error string
                 desc = desc[9:] # remove "<string>:"
-               line = int(desc[:desc.find(':')]) # get just the line number
+               line = int(desc[:desc.find(':')] or 0) # get the line number
                 desc = desc[desc.find(')')+2:-1] # remove (LEVEL/N)
                 try:
                         desc, context = desc.split('\n', 1)
@@ -399,10 +526,34 @@ def validate_rst(rst):
                         desc = desc[:-1]
                 return (line, desc, context)
  
+def valid_link(link):
+       import re
+       scheme_re = r'^[a-zA-Z]+:'
+       mail_re = r"^[^ \t\n\r@<>()]+@[a-z0-9][a-z0-9\.\-_]*\.[a-z]+$"
+       url_re = r'^(?:[a-z0-9\-]+|[a-z0-9][a-z0-9\-\.\_]*\.[a-z]+)' \
+                       r'(?::[0-9]+)?(?:/.*)?$'
+
+       if re.match(scheme_re, link, re.I):
+               scheme, rest = link.split(':', 1)
+               # if we have an scheme and a rest, assume the link is valid
+               # and return it as-is; otherwise (having just the scheme) is
+               # invalid
+               if rest:
+                       return link
+               return None
+
+       # at this point, we don't have a scheme; we will try to recognize some
+       # common addresses (mail and http at the moment) and complete them to
+       # form a valid link, if we fail we will just claim it's invalid
+       if re.match(mail_re, link, re.I):
+               return 'mailto:' + link
+       elif re.match(url_re, link, re.I):
+               return 'http://' + link
+
+       return None
+
  def sanitize(obj):
-       if isinstance(obj, basestring):
-               return cgi.escape(obj, True)
-       return obj
+       return cgi.escape(obj, quote = True)
  
  
  # find out our URL, needed for syndication
@@ -436,6 +587,7 @@ class Templates (object):
                         'showyear': showyear,
                         'monthlinks': ' '.join(db.get_month_links(showyear)),
                         'yearlinks': ' '.join(db.get_year_links()),
+                       'taglinks': ' '.join(db.get_tag_links()),
                 }
  
         def get_template(self, page_name, default_template, extra_vars = None):
@@ -468,9 +620,9 @@ class Templates (object):
                 vars = comment.to_vars()
                 if comment.link:
                         vars['linked_author'] = '<a href="%s">%s</a>' \
-                                       % (comment.link, comment.author)
+                                       % (vars['link'], vars['author'])
                 else:
-                       vars['linked_author'] = comment.author
+                       vars['linked_author'] = vars['author']
                 return self.get_template(
                         'com_header', default_comment_header, vars)
  
@@ -478,9 +630,10 @@ class Templates (object):
                 return self.get_template(
                         'com_footer', default_comment_footer, comment.to_vars())
  
-       def get_comment_form(self, article, form_data):
+       def get_comment_form(self, article, form_data, captcha_puzzle):
                 vars = article.to_vars()
                 vars.update(form_data.to_vars(self))
+               vars['captcha_puzzle'] = captcha_puzzle
                 return self.get_template(
                         'com_form', default_comment_form, vars)
  
@@ -490,12 +643,14 @@ class Templates (object):
  
  
  class CommentFormData (object):
-       def __init__(self, author = '', link = '', body = ''):
+       def __init__(self, author = '', link = '', captcha = '', body = ''):
                 self.author = author
                 self.link = link
+               self.captcha = captcha
                 self.body = body
                 self.author_error = ''
                 self.link_error = ''
+               self.captcha_error = ''
                 self.body_error = ''
                 self.action = ''
                 self.method = 'post'
@@ -504,14 +659,18 @@ class CommentFormData (object):
                 render_error = template.get_comment_error
                 a_error = self.author_error and render_error(self.author_error)
                 l_error = self.link_error and render_error(self.link_error)
+               c_error = self.captcha_error \
+                               and render_error(self.captcha_error)
                 b_error = self.body_error and render_error(self.body_error)
                 return {
                         'form_author': sanitize(self.author),
                         'form_link': sanitize(self.link),
+                       'form_captcha': sanitize(self.captcha),
                         'form_body': sanitize(self.body),
  
                         'form_author_error': a_error,
                         'form_link_error': l_error,
+                       'form_captcha_error': c_error,
                         'form_body_error': b_error,
  
                         'form_action': self.action,
@@ -535,25 +694,23 @@ class Comment (object):
                 self._link = ''
                 self._raw_content = 'Removed comment'
  
-
-       def get_author(self):
+       @property
+       def author(self):
                 if not self.loaded:
                         self.load()
                 return self._author
-       author = property(fget = get_author)
  
-       def get_link(self):
+       @property
+       def link(self):
                 if not self.loaded:
                         self.load()
                 return self._link
-       link = property(fget = get_link)
  
-       def get_raw_content(self):
+       @property
+       def raw_content(self):
                 if not self.loaded:
                         self.load()
                 return self._raw_content
-       raw_content = property(fget = get_raw_content)
-
  
         def set(self, author, raw_content, link = '', created = None):
                 self.loaded = True
@@ -621,6 +778,11 @@ class Comment (object):
  class CommentDB (object):
         def __init__(self, article):
                 self.path = os.path.join(comments_path, article.uuid)
+               # if comments were enabled after the article was added, we
+               # will need to create the directory
+               if not os.path.exists(self.path):
+                       os.mkdir(self.path, 0777)
+
                 self.comments = []
                 self.load(article)
  
@@ -677,51 +839,35 @@ class Article (object):
                 self._raw_content = ''
                 self._comments = []
  
-
-       def get_title(self):
+       @property
+       def title(self):
                 if not self.loaded:
                         self.load()
                 return self._title
-       title = property(fget = get_title)
  
-       def get_author(self):
+       @property
+       def author(self):
                 if not self.loaded:
                         self.load()
                 return self._author
-       author = property(fget = get_author)
  
-       def get_tags(self):
+       @property
+       def tags(self):
                 if not self.loaded:
                         self.load()
                 return self._tags
-       tags = property(fget = get_tags)
  
-       def get_raw_content(self):
+       @property
+       def raw_content(self):
                 if not self.loaded:
                         self.load()
                 return self._raw_content
-       raw_content = property(fget = get_raw_content)
  
-       def get_comments(self):
+       @property
+       def comments(self):
                 if not self.loaded:
                         self.load()
                 return self._comments
-       comments = property(fget = get_comments)
-
-
-       def __cmp__(self, other):
-               if self.path == other.path:
-                       return 0
-               if not self.created:
-                       return 1
-               if not other.created:
-                       return -1
-               if self.created < other.created:
-                       return -1
-               return 1
-
-       def title_cmp(self, other):
-               return cmp(self.title, other.title)
  
  
         def add_comment(self, author, raw_content, link = ''):
@@ -765,7 +911,10 @@ class Article (object):
                 self.loaded = True
  
         def to_html(self):
-               return rst_to_html(self.raw_content)
+               dirname = os.path.dirname
+               post_url = '/'.join(dirname(full_url), data_path, dirname(self.path))
+               rst = self.raw_content.replace('##POST_URL##', post_url)
+               return rst_to_html(rst)
  
         def to_vars(self):
                 return {
@@ -812,6 +961,7 @@ class ArticleDB (object):
                 self.uuids = {}
                 self.actyears = set()
                 self.actmonths = set()
+               self.acttags = set()
                 self.load()
  
         def get_articles(self, year = 0, month = 0, day = 0, tags = None):
@@ -849,6 +999,7 @@ class ArticleDB (object):
                                 datetime.datetime.fromtimestamp(float(l[1])),
                                 datetime.datetime.fromtimestamp(float(l[2])))
                         self.uuids[a.uuid] = a
+                       self.acttags.update(a.tags)
                         self.actyears.add(a.created.year)
                         self.actmonths.add((a.created.year, a.created.month))
                         self.articles.append(a)
@@ -883,6 +1034,12 @@ class ArticleDB (object):
                         ml.append(s)
                 return ml
  
+       def get_tag_links(self):
+               tl = list(self.acttags)
+               tl.sort()
+               return [ '<a href="%s/tag/%s">%s</a>' % (blog_url,
+                               sanitize(t), sanitize(t)) for t in tl ]
+
  #
  # Main
  #
@@ -898,7 +1055,8 @@ def render_comments(article, template, form_data):
         if not form_data:
                 form_data = CommentFormData()
         form_data.action = blog_url + '/comment/' + article.uuid + '#comment'
-       print template.get_comment_form(article, form_data)             ,
+       captcha = captcha_method(article)
+       print template.get_comment_form(article, form_data, captcha.puzzle)
  
  def render_html(articles, db, actyear = None, show_comments = False,
                 redirect =  None, form_data = None):
@@ -923,7 +1081,7 @@ def render_artlist(articles, db, actyear = None):
         print template.get_main_header()
         print '<h2>Articles</h2>'
         for a in articles:
-               print '<li><a href="%(url)s/uuid/%(uuid)s">%(title)s</a></li>' \
+               print '<li><a href="%(url)s/post/%(uuid)s">%(title)s</a></li>' \
                         % {     'url': blog_url,
                                 'uuid': a.uuid,
                                 'title': a.title,
@@ -969,9 +1127,9 @@ def render_atom(articles):
      <published>%(ciso)sZ</published>
      <updated>%(uiso)sZ</updated>
      <content type="xhtml">
-      <div xmlns="http://www.w3.org/1999/xhtml"><p>
+      <div xmlns="http://www.w3.org/1999/xhtml">
  %(contents)s
-      </p></div>
+      </div>
      </content>
    </entry>
                 """ % vars
@@ -982,6 +1140,37 @@ def render_style():
         print 'Content-type: text/css\r\n\r\n',
         print default_css
  
+# Get a dictionary with sort() arguments (key and reverse) by parsing the sort
+# specification format:
+# [+-]?<key>?
+# Where "-" is used to specify reverse order, while "+" is regular, ascending,
+# order (reverse = False). The key value is an Article's attribute name (title,
+# author, created, updated and uuid are accepted), and will be used as key for
+# sorting. If a value is omitted, that value is taken from the default, which
+# should be provided using the same format specification, with the difference
+# that all values must be provided for the default.
+def get_sort_args(sort_str, default):
+       def parse(s):
+               d = dict()
+               if not s:
+                       return d
+               key = None
+               if len(s) > 0:
+                       # accept ' ' as an alias of '+' since '+' is translated
+                       # to ' ' in URLs
+                       if s[0] in ('+', ' ', '-'):
+                               key = s[1:]
+                               d['reverse'] = (s[0] == '-')
+                       else:
+                               key = s
+               if key in ('title', 'author', 'created', 'updated', 'uuid'):
+                       d['key'] = lambda a: getattr(a, key)
+               return d
+       args = parse(default)
+       assert args['key'] is not None and args['reverse'] is not None
+       args.update(parse(sort_str))
+       return args
+
  def handle_cgi():
         import cgitb; cgitb.enable()
  
@@ -990,10 +1179,12 @@ def handle_cgi():
         month = int(form.getfirst("month", 0))
         day = int(form.getfirst("day", 0))
         tags = set(form.getlist("tag"))
+       sort_str = form.getfirst("sort", None)
         uuid = None
         atom = False
         style = False
         post = False
+       post_preview = False
         artlist = False
         comment = False
  
@@ -1003,10 +1194,11 @@ def handle_cgi():
                 atom = path_info == '/atom'
                 tag = path_info.startswith('/tag/')
                 post = path_info.startswith('/post/')
+               post_preview = path_info.startswith('/preview/post/')
                 artlist = path_info.startswith('/list')
                 comment = path_info.startswith('/comment/') and enable_comments
-               if not style and not atom and not post and not tag \
-                               and not comment and not artlist:
+               if not style and not atom and not post and not post_preview \
+                               and not tag and not comment and not artlist:
                         date = path_info.split('/')[1:]
                         try:
                                 if len(date) > 1 and date[0]:
@@ -1020,6 +1212,15 @@ def handle_cgi():
                 elif post:
                         uuid = path_info.replace('/post/', '')
                         uuid = uuid.replace('/', '')
+               elif post_preview:
+                       art_path = path_info.replace('/preview/post/', '')
+                       art_path = urllib.unquote_plus(art_path)
+                       art_path = os.path.join(data_path, art_path)
+                       art_path = os.path.realpath(art_path)
+                       common = os.path.commonprefix([data_path, art_path])
+                       if common != data_path: # something nasty happened
+                               post_preview = False
+                       art_path = art_path[len(data_path)+1:]
                 elif tag:
                         t = path_info.replace('/tag/', '')
                         t = t.replace('/', '')
@@ -1031,42 +1232,64 @@ def handle_cgi():
                         uuid = uuid.replace('/', '')
                         author = form.getfirst('comformauthor', '')
                         link = form.getfirst('comformlink', '')
+                       captcha = form.getfirst('comformcaptcha', '')
                         body = form.getfirst('comformbody', '')
  
         db = ArticleDB(os.path.join(data_path, 'db'))
         if atom:
                 articles = db.get_articles(tags = tags)
-               articles.sort(reverse = True)
-               render_atom(articles[:10])
+               articles.sort(**get_sort_args(sort_str, '-created'))
+               render_atom(articles[:index_articles])
         elif style:
                 render_style()
         elif post:
                 render_html( [db.get_article(uuid)], db, year, enable_comments )
+       elif post_preview:
+               article = Article(art_path, datetime.datetime.now(),
+                                       datetime.datetime.now())
+               render_html( [article], db, year, enable_comments )
         elif artlist:
                 articles = db.get_articles()
-               articles.sort(cmp = Article.title_cmp)
+               articles.sort(**get_sort_args(sort_str, '+title'))
                 render_artlist(articles, db)
-       elif comment:
+       elif comment and enable_comments:
                 form_data = CommentFormData(author.strip().replace('\n', ' '),
-                               link.strip().replace('\n', ' '), body.strip())
+                               link.strip().replace('\n', ' '), captcha,
+                               body.replace('\r', ''))
                 article = db.get_article(uuid)
+               captcha = captcha_method(article)
                 redirect = False
                 valid = True
                 if not form_data.author:
                         form_data.author_error = 'please, enter your name'
                         valid = False
+               if form_data.link:
+                       link = valid_link(form_data.link)
+                       if link:
+                               form_data.link = link
+                       else:
+                               form_data.link_error = 'please, enter a ' \
+                                               'valid link'
+                               valid = False
+               if not captcha.validate(form_data):
+                       form_data.captcha_error = captcha.help
+                       valid = False
                 if not form_data.body:
                         form_data.body_error = 'please, write a comment'
                         valid = False
                 else:
-                       error = validate_rst(form_data.body)
+                       error = validate_rst(form_data.body, secure=False)
                         if error is not None:
                                 (line, desc, ctx) = error
-                               form_data.body_error = 'error at line %d: %s' \
-                                               % (line, desc)
+                               at = ''
+                               if line:
+                                       at = ' at line %d' % line
+                               form_data.body_error = 'error%s: %s' \
+                                               % (at, desc)
                                 valid = False
                 if valid:
-                       c = article.add_comment(author, body, link)
+                       c = article.add_comment(form_data.author,
+                                       form_data.body, form_data.link)
                         c.save()
                         cdb = CommentDB(article)
                         cdb.comments = article.comments
@@ -1077,9 +1300,9 @@ def handle_cgi():
                                 form_data )
         else:
                 articles = db.get_articles(year, month, day, tags)
-               articles.sort(reverse = True)
+               articles.sort(**get_sort_args(sort_str, '-created'))
                 if not year and not month and not day and not tags:
-                       articles = articles[:10]
+                       articles = articles[:index_articles]
                 render_html(articles, db, year)
  
  
@@ -1157,7 +1380,10 @@ def handle_cmd():
  
  
  if os.environ.has_key('GATEWAY_INTERFACE'):
+       i = datetime.datetime.now()
         handle_cgi()
+       f = datetime.datetime.now()
+       print '<!-- render time: %s -->' % (f-i)
  else:
         sys.exit(handle_cmd())