]> git.llucax.com Git - software/blitiri.git/blob - blitiri.cgi
Properly translate HTML entities in user inputs when rendering
[software/blitiri.git] / blitiri.cgi
1 #!/usr/bin/env python
2 #coding: utf8
3
4 # blitiri - A single-file blog engine.
5 # Alberto Bertogli (albertito@gmail.com)
6
7 #
8 # Configuration section
9 #
10 # You can edit these values, or create a file named "config.py" and put them
11 # there to make updating easier. The ones in config.py take precedence.
12 #
13
14 # Directory where entries are stored
15 data_path = "/tmp/blog/data"
16
17 # Path where templates are stored. Use an empty string for the built-in
18 # default templates. If they're not found, the built-in ones will be used.
19 templates_path = "/tmp/blog/templates"
20
21 # URL to the blog, including the name. Can be a full URL or just the path.
22 blog_url = "/blog/blitiri.cgi"
23
24 # Style sheet (CSS) URL. Can be relative or absolute. To use the built-in
25 # default, set it to blog_url + "/style".
26 css_url = blog_url + "/style"
27
28 # Blog title
29 title = "I don't like blogs"
30
31 # Default author
32 author = "Hartmut Kegan"
33
34 # Article encoding
35 encoding = "utf8"
36
37 #
38 # End of configuration
39 # DO *NOT* EDIT ANYTHING PAST HERE
40 #
41
42
43 import sys
44 import os
45 import time
46 import datetime
47 import calendar
48 import zlib
49 import urllib
50 import cgi
51 from docutils.core import publish_parts
52
53 # Before importing the config, add our cwd to the Python path
54 sys.path.append(os.getcwd())
55
56 # Load the config file, if there is one
57 try:
58         from config import *
59 except:
60         pass
61
62
63 # Pimp *_path config variables to support relative paths
64 data_path = os.path.realpath(data_path)
65 templates_path = os.path.realpath(templates_path)
66
67 # Default template
68
69 default_main_header = """\
70 <?xml version="1.0" encoding="utf-8"?>
71 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
72           "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
73
74 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
75 <head>
76 <link rel="alternate" title="%(title)s" href="%(fullurl)s/atom"
77         type="application/atom+xml" />
78 <link href="%(css_url)s" rel="stylesheet" type="text/css" />
79 <title>%(title)s</title>
80 </head>
81
82 <body>
83
84 <h1><a href="%(url)s">%(title)s</a></h1>
85
86 <div class="content">
87 """
88
89 default_main_footer = """
90 </div>
91 <div class="footer">
92   %(showyear)s: %(monthlinks)s<br/>
93   years: %(yearlinks)s<br/>
94   subscribe: <a href="%(url)s/atom">atom</a><br/>
95   views: <a href="%(url)s/">blog</a> <a href="%(url)s/list">list</a><br/>
96 </div>
97
98 </body>
99 </html>
100 """
101
102 default_article_header = """
103 <div class="article">
104 <h2><a href="%(url)s/post/%(uuid)s">%(arttitle)s</a></h2>
105 <span class="artinfo">
106   by %(author)s on <span class="date">
107
108 <a class="date" href="%(url)s/%(cyear)d/">%(cyear)04d</a>-\
109 <a class="date" href="%(url)s/%(cyear)d/%(cmonth)d/">%(cmonth)02d</a>-\
110 <a class="date" href="%(url)s/%(cyear)d/%(cmonth)d/%(cday)d/">%(cday)02d</a>\
111     %(chour)02d:%(cminute)02d</span>
112   (updated on <span class="date">
113 <a class="date" href="%(url)s/%(uyear)d/">%(uyear)04d</a>-\
114 <a class="date" href="%(url)s/%(uyear)d/%(umonth)d/">%(umonth)02d</a>-\
115 <a class="date" href="%(url)s/%(uyear)d/%(umonth)d/%(uday)d/">%(uday)02d</a>\
116     %(uhour)02d:%(uminute)02d)</span><br/>
117   <span class="tags">tagged %(tags)s</span>
118 </span><br/>
119 <p/>
120 <div class="artbody">
121 """
122
123 default_article_footer = """
124 <p/>
125 </div>
126 </div>
127 """
128
129 # Default CSS
130 default_css = """
131 body {
132         font-family: sans-serif;
133         font-size: small;
134         width: 52em;
135 }
136
137 div.content {
138         width: 96%;
139 }
140
141 h1 {
142         font-size: large;
143         border-bottom: 2px solid #99F;
144         width: 100%;
145         margin-bottom: 1em;
146 }
147
148 h2 {
149         font-size: medium;
150         font-weigth: none;
151         margin-bottom: 1pt;
152         border-bottom: 1px solid #99C;
153 }
154
155 h1 a, h2 a {
156         text-decoration: none;
157         color: black;
158 }
159
160 span.artinfo {
161         font-size: xx-small;
162 }
163
164 span.artinfo a {
165         text-decoration: none;
166         color: #339;
167 }
168
169 span.artinfo a:hover {
170         text-decoration: none;
171         color: blue;
172 }
173
174 div.artbody {
175         margin-left: 1em;
176 }
177
178 div.article {
179         margin-bottom: 2em;
180 }
181
182 div.footer {
183         margin-top: 1em;
184         padding-top: 0.4em;
185         width: 100%;
186         border-top: 2px solid #99F;
187         font-size: x-small;
188 }
189
190 div.footer a {
191         text-decoration: none;
192 }
193
194 /* Articles are enclosed in <div class="section"> */
195 div.section h1 {
196         font-size: small;
197         font-weigth: none;
198         width: 100%;
199         margin-bottom: 1pt;
200         border-bottom: 1px dotted #99C;
201 }
202
203 """
204
205 # helper functions
206 def rst_to_html(rst):
207         settings = {
208                 'input_encoding': encoding,
209                 'output_encoding': 'utf8',
210         }
211         parts = publish_parts(rst, settings_overrides = settings,
212                                 writer_name = "html")
213         return parts['body'].encode('utf8')
214
215 def sanitize(obj):
216         if isinstance(obj, basestring):
217                 return cgi.escape(obj, True)
218         return obj
219
220
221 # find out our URL, needed for syndication
222 try:
223         n = os.environ['SERVER_NAME']
224         p = os.environ['SERVER_PORT']
225         s = os.environ['SCRIPT_NAME']
226         if p == '80': p = ''
227         else: p = ':' + p
228         full_url = 'http://%s%s%s' % (n, p, s)
229 except KeyError:
230         full_url = 'Not needed'
231
232
233 class Templates (object):
234         def __init__(self, tpath, db, showyear = None):
235                 self.tpath = tpath
236                 self.db = db
237                 now = datetime.datetime.now()
238                 if not showyear:
239                         showyear = now.year
240
241                 self.vars = {
242                         'css_url': css_url,
243                         'title': title,
244                         'url': blog_url,
245                         'fullurl': full_url,
246                         'year': now.year,
247                         'month': now.month,
248                         'day': now.day,
249                         'showyear': showyear,
250                         'monthlinks': ' '.join(db.get_month_links(showyear)),
251                         'yearlinks': ' '.join(db.get_year_links()),
252                 }
253
254         def get_template(self, page_name, default_template, extra_vars = None):
255                 if extra_vars is None:
256                         vars = self.vars
257                 else:
258                         vars = self.vars.copy()
259                         vars.update(extra_vars)
260
261                 p = '%s/%s.html' % (self.tpath, page_name)
262                 if os.path.isfile(p):
263                         return open(p).read() % vars
264                 return default_template % vars
265
266         def get_main_header(self):
267                 return self.get_template('header', default_main_header)
268
269         def get_main_footer(self):
270                 return self.get_template('footer', default_main_footer)
271
272         def get_article_header(self, article):
273                 return self.get_template(
274                         'art_header', default_article_header, article.to_vars())
275
276         def get_article_footer(self, article):
277                 return self.get_template(
278                         'art_footer', default_article_footer, article.to_vars())
279
280
281 class Article (object):
282         def __init__(self, path, created = None, updated = None):
283                 self.path = path
284                 self.created = created
285                 self.updated = updated
286                 self.uuid = "%08x" % zlib.crc32(self.path)
287
288                 self.loaded = False
289
290                 # loaded on demand
291                 self._title = 'Removed post'
292                 self._author = author
293                 self._tags = []
294                 self._raw_content = ''
295
296
297         def get_title(self):
298                 if not self.loaded:
299                         self.load()
300                 return self._title
301         title = property(fget = get_title)
302
303         def get_author(self):
304                 if not self.loaded:
305                         self.load()
306                 return self._author
307         author = property(fget = get_author)
308
309         def get_tags(self):
310                 if not self.loaded:
311                         self.load()
312                 return self._tags
313         tags = property(fget = get_tags)
314
315         def get_raw_content(self):
316                 if not self.loaded:
317                         self.load()
318                 return self._raw_content
319         raw_content = property(fget = get_raw_content)
320
321
322         def __cmp__(self, other):
323                 if self.path == other.path:
324                         return 0
325                 if not self.created:
326                         return 1
327                 if not other.created:
328                         return -1
329                 if self.created < other.created:
330                         return -1
331                 return 1
332
333         def title_cmp(self, other):
334                 return cmp(self.title, other.title)
335
336
337         def load(self):
338                 try:
339                         raw = open(data_path + '/' + self.path).readlines()
340                 except:
341                         return
342
343                 count = 0
344                 for l in raw:
345                         if ':' in l:
346                                 name, value = l.split(':', 1)
347                                 if name.lower() == 'title':
348                                         self._title = value.strip()
349                                 elif name.lower() == 'author':
350                                         self._author = value.strip()
351                                 elif name.lower() == 'tags':
352                                         ts = value.split(',')
353                                         ts = [t.strip() for t in ts]
354                                         self._tags = set(ts)
355                         elif l == '\n':
356                                 # end of header
357                                 break
358                         count += 1
359                 self._raw_content = ''.join(raw[count + 1:])
360                 self.loaded = True
361
362         def to_html(self):
363                 return rst_to_html(self.raw_content)
364
365         def to_vars(self):
366                 return {
367                         'arttitle': sanitize(self.title),
368                         'author': sanitize(self.author),
369                         'date': self.created.isoformat(' '),
370                         'uuid': self.uuid,
371                         'tags': self.get_tags_links(),
372
373                         'created': self.created.isoformat(' '),
374                         'ciso': self.created.isoformat(),
375                         'cyear': self.created.year,
376                         'cmonth': self.created.month,
377                         'cday': self.created.day,
378                         'chour': self.created.hour,
379                         'cminute': self.created.minute,
380                         'csecond': self.created.second,
381
382                         'updated': self.updated.isoformat(' '),
383                         'uiso': self.updated.isoformat(),
384                         'uyear': self.updated.year,
385                         'umonth': self.updated.month,
386                         'uday': self.updated.day,
387                         'uhour': self.updated.hour,
388                         'uminute': self.updated.minute,
389                         'usecond': self.updated.second,
390                 }
391
392         def get_tags_links(self):
393                 l = []
394                 tags = list(self.tags)
395                 tags.sort()
396                 for t in tags:
397                         l.append('<a class="tag" href="%s/tag/%s">%s</a>' % \
398                                 (blog_url, urllib.quote(t), sanitize(t) ))
399                 return ', '.join(l)
400
401
402 class DB (object):
403         def __init__(self, dbpath):
404                 self.dbpath = dbpath
405                 self.articles = []
406                 self.uuids = {}
407                 self.actyears = set()
408                 self.actmonths = set()
409                 self.load()
410
411         def get_articles(self, year = 0, month = 0, day = 0, tags = None):
412                 l = []
413                 for a in self.articles:
414                         if year and a.created.year != year: continue
415                         if month and a.created.month != month: continue
416                         if day and a.created.day != day: continue
417                         if tags and not tags.issubset(a.tags): continue
418
419                         l.append(a)
420
421                 return l
422
423         def get_article(self, uuid):
424                 return self.uuids[uuid]
425
426         def load(self):
427                 try:
428                         f = open(self.dbpath)
429                 except:
430                         return
431
432                 for l in f:
433                         # Each line has the following comma separated format:
434                         # path (relative to data_path), \
435                         #       created (epoch), \
436                         #       updated (epoch)
437                         try:
438                                 l = l.split(',')
439                         except:
440                                 continue
441
442                         a = Article(l[0],
443                                 datetime.datetime.fromtimestamp(float(l[1])),
444                                 datetime.datetime.fromtimestamp(float(l[2])))
445                         self.uuids[a.uuid] = a
446                         self.actyears.add(a.created.year)
447                         self.actmonths.add((a.created.year, a.created.month))
448                         self.articles.append(a)
449
450         def save(self):
451                 f = open(self.dbpath + '.tmp', 'w')
452                 for a in self.articles:
453                         s = ''
454                         s += a.path + ', '
455                         s += str(time.mktime(a.created.timetuple())) + ', '
456                         s += str(time.mktime(a.updated.timetuple())) + '\n'
457                         f.write(s)
458                 f.close()
459                 os.rename(self.dbpath + '.tmp', self.dbpath)
460
461         def get_year_links(self):
462                 yl = list(self.actyears)
463                 yl.sort(reverse = True)
464                 return [ '<a href="%s/%d/">%d</a>' % (blog_url, y, y)
465                                 for y in yl ]
466
467         def get_month_links(self, year):
468                 am = [ i[1] for i in self.actmonths if i[0] == year ]
469                 ml = []
470                 for i in range(1, 13):
471                         name = calendar.month_name[i][:3]
472                         if i in am:
473                                 s = '<a href="%s/%d/%d/">%s</a>' % \
474                                         ( blog_url, year, i, name )
475                         else:
476                                 s = name
477                         ml.append(s)
478                 return ml
479
480 #
481 # Main
482 #
483
484
485 def render_html(articles, db, actyear = None):
486         template = Templates(templates_path, db, actyear)
487         print 'Content-type: text/html; charset=utf-8\n'
488         print template.get_main_header()
489         for a in articles:
490                 print template.get_article_header(a)
491                 print a.to_html()
492                 print template.get_article_footer(a)
493         print template.get_main_footer()
494
495 def render_artlist(articles, db, actyear = None):
496         template = Templates(templates_path, db, actyear)
497         print 'Content-type: text/html; charset=utf-8\n'
498         print template.get_main_header()
499         print '<h2>Articles</h2>'
500         for a in articles:
501                 print '<li><a href="%(url)s/uuid/%(uuid)s">%(title)s</a></li>' \
502                         % {     'url': blog_url,
503                                 'uuid': a.uuid,
504                                 'title': a.title,
505                                 'author': a.author,
506                         }
507         print template.get_main_footer()
508
509 def render_atom(articles):
510         if len(articles) > 0:
511                 updated = articles[0].updated.isoformat()
512         else:
513                 updated = datetime.datetime.now().isoformat()
514
515         print 'Content-type: application/atom+xml; charset=utf-8\n'
516         print """<?xml version="1.0" encoding="utf-8"?>
517
518 <feed xmlns="http://www.w3.org/2005/Atom">
519  <title>%(title)s</title>
520  <link rel="alternate" type="text/html" href="%(url)s"/>
521  <link rel="self" type="application/atom+xml" href="%(url)s/atom"/>
522  <id>%(url)s</id> <!-- TODO: find a better <id>, see RFC 4151 -->
523  <updated>%(updated)sZ</updated>
524
525         """ % {
526                 'title': title,
527                 'url': full_url,
528                 'updated': updated,
529         }
530
531         for a in articles:
532                 vars = a.to_vars()
533                 vars.update( {
534                         'url': full_url,
535                         'contents': a.to_html(),
536                 } )
537                 print """
538   <entry>
539     <title>%(arttitle)s</title>
540     <author><name>%(author)s</name></author>
541     <link href="%(url)s/post/%(uuid)s" />
542     <id>%(url)s/post/%(uuid)s</id>
543     <summary>%(arttitle)s</summary>
544     <published>%(ciso)sZ</published>
545     <updated>%(uiso)sZ</updated>
546     <content type="xhtml">
547       <div xmlns="http://www.w3.org/1999/xhtml"><p>
548 %(contents)s
549       </p></div>
550     </content>
551   </entry>
552                 """ % vars
553         print "</feed>"
554
555
556 def render_style():
557         print 'Content-type: text/css\r\n\r\n',
558         print default_css
559
560 def handle_cgi():
561         import cgitb; cgitb.enable()
562
563         form = cgi.FieldStorage()
564         year = int(form.getfirst("year", 0))
565         month = int(form.getfirst("month", 0))
566         day = int(form.getfirst("day", 0))
567         tags = set(form.getlist("tag"))
568         uuid = None
569         atom = False
570         style = False
571         post = False
572         artlist = False
573
574         if os.environ.has_key('PATH_INFO'):
575                 path_info = os.environ['PATH_INFO']
576                 style = path_info == '/style'
577                 atom = path_info == '/atom'
578                 tag = path_info.startswith('/tag/')
579                 post = path_info.startswith('/post/')
580                 artlist = path_info.startswith('/list')
581                 if not style and not atom and not post and not tag \
582                                 and not artlist:
583                         date = path_info.split('/')[1:]
584                         try:
585                                 if len(date) > 1 and date[0]:
586                                         year = int(date[0])
587                                 if len(date) > 2 and date[1]:
588                                         month = int(date[1])
589                                 if len(date) > 3 and date[2]:
590                                         day = int(date[2])
591                         except ValueError:
592                                 pass
593                 elif post:
594                         uuid = path_info.replace('/post/', '')
595                         uuid = uuid.replace('/', '')
596                 elif tag:
597                         t = path_info.replace('/tag/', '')
598                         t = t.replace('/', '')
599                         t = urllib.unquote_plus(t)
600                         tags = set((t,))
601
602         db = DB(data_path + '/db')
603         if atom:
604                 articles = db.get_articles(tags = tags)
605                 articles.sort(reverse = True)
606                 render_atom(articles[:10])
607         elif style:
608                 render_style()
609         elif post:
610                 render_html( [db.get_article(uuid)], db, year )
611         elif artlist:
612                 articles = db.get_articles()
613                 articles.sort(cmp = Article.title_cmp)
614                 render_artlist(articles, db)
615         else:
616                 articles = db.get_articles(year, month, day, tags)
617                 articles.sort(reverse = True)
618                 if not year and not month and not day and not tags:
619                         articles = articles[:10]
620                 render_html(articles, db, year)
621
622
623 def usage():
624         print 'Usage: %s {add|rm|update} article_path' % sys.argv[0]
625
626 def handle_cmd():
627         if len(sys.argv) != 3:
628                 usage()
629                 return 1
630
631         cmd = sys.argv[1]
632         art_path = os.path.realpath(sys.argv[2])
633
634         if os.path.commonprefix([data_path, art_path]) != data_path:
635                 print "Error: article (%s) must be inside data_path (%s)" % \
636                                 (art_path, data_path)
637                 return 1
638         art_path = art_path[len(data_path):]
639
640         if not os.path.isfile(data_path + '/db'):
641                 open(data_path + '/db', 'w').write('')
642         db = DB(data_path + '/db')
643
644         if cmd == 'add':
645                 article = Article(art_path, datetime.datetime.now(),
646                                         datetime.datetime.now())
647                 for a in db.articles:
648                         if a == article:
649                                 print 'Error: article already exists'
650                                 return 1
651                 db.articles.append(article)
652                 db.save()
653         elif cmd == 'rm':
654                 article = Article(art_path)
655                 for a in db.articles:
656                         if a == article:
657                                 break
658                 else:
659                         print "Error: no such article"
660                         return 1
661                 db.articles.remove(a)
662                 db.save()
663         elif cmd == 'update':
664                 article = Article(art_path)
665                 for a in db.articles:
666                         if a == article:
667                                 break
668                 else:
669                         print "Error: no such article"
670                         return 1
671                 a.updated = datetime.datetime.now()
672                 db.save()
673         else:
674                 usage()
675                 return 1
676
677         return 0
678
679
680 if os.environ.has_key('GATEWAY_INTERFACE'):
681         handle_cgi()
682 else:
683         sys.exit(handle_cmd())
684
685