weblog-1.1/.hg_archival.txt0000644000000000000000000000013611063022120016045 0ustar00usergroup00000000000000repo: 00ecfb3367fecf6d8ba7a94c3f995bc789c18d1e node: fb261def7b424555533074cfd617e9de024d683c weblog-1.1/.hgignore0000644000000000000000000000006411063022120014562 0ustar00usergroup00000000000000syntax: glob *.pyc *~ .*.swp build weblog.egg-info weblog-1.1/.hgtags0000644000000000000000000000133011063022120014232 0ustar00usergroup000000000000009bafbb9dfb928172d988390ea61932b610278ea3 WEBLOG_0_1 7053f6c08fab7af1c5b76d78a9bb6e41fe6a8b5a WEBLOG_0_2 ebe752dc0a655b451babdc2acb6027a523ac8474 WEBLOG_0_3 9cc6b91a2fb95946b5443461201c8a57ad301a53 WEBLOG_0_4 85db8e1cb11890a15f38b3d161dc59962e00b135 WEBLOG_0_5 fcd5f323c67112916c0d43d776f52a96064bef53 WEBLOG_0_5 44abff8da985b456545fa393a2f634932400476b WEBLOG_0_5 a0a1dd94b8b2371f45536e90ee03074dae314f71 WEBLOG_0_6 657340b5fa4b2a1747e139809da6e576d7699290 WEBLOG_0_7 f4075497305adf1cada74fa556a227049a2ccae5 WEBLOG_0_8 8377a76875c476b8697cbfc25be7b3d1fe961028 WEBLOG_0_9 2b7a9f4e897d42683ac16491822eddeab8f5b3b7 WEBLOG_1_0 1ed0521ffc52335e6560d2135b0f85dc4aab01b2 WEBLOG_1_0 e54c184ffac370252b0f34933a307cf741f92f97 WEBLOG_1_1 weblog-1.1/COPYING0000644000000000000000000000137011063022120014013 0ustar00usergroup00000000000000Copyright (c) 2007, 2008, Henry Precheur Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. weblog-1.1/README0000644000000000000000000000062011063022120013635 0ustar00usergroup00000000000000Weblog is a web log or blog publisher. It takes structured text files as input and outputs static HTML / RSS files. Weblog aims to be simple and robust. to learn how to install and use weblog please read the text file: doc/weblog.rst If you have docutils installed you can turn it into a HTML file: $ rst2html.py weblog.rst > weblog.html Jinja is needed to use Weblog (http://jinja.pocoo.org) weblog-1.1/TODO0000644000000000000000000000011211063022120013441 0ustar00usergroup000000000000001.2 --- Document Markdown syntax Add full tutorial More and more docs :) weblog-1.1/bin/weblog0000755000000000000000000000524711063022120014744 0ustar00usergroup00000000000000#!/usr/bin/env python # vim:set filetype=python: import os import sys import datetime import logging from shutil import copy from optparse import OptionParser, SUPPRESS_HELP from weblog import command_publish, command_date _COMMANDS = ('publish', 'date') def main(): parser = OptionParser() parser.add_option("-s", "--source_dir", dest="source_dir", default='.', help='The source directory where the blog posts are ' 'located. [default: \'%default\']', metavar="DIR") parser.add_option("-o", "--output_dir", dest="output_dir", default='output', help='The directory where all the generated files are ' 'written. If it does not exist it is created.' '[default: \'%default\']', metavar="DIR") parser.add_option('-c', '--conf', dest='configuration_file', help='The configuration file to use. If the file is not ' 'present in the current directory, the source directory ' 'is searched.' ' [default: \'%default\']', metavar='FILE', default='weblog.ini') parser.add_option('-q', '--quiet', dest='quiet', default=False, action='store_true', help='Do not output anything except critical error ' 'messages') parser.add_option('--debug', dest='debug', default=False, action='store_true', help=SUPPRESS_HELP) parser.set_usage('%%prog [option] command\n\nCommands:\n %s' % \ '\n '.join(_COMMANDS)) (options, args) = parser.parse_args() if options.debug: logging.basicConfig(level=logging.DEBUG, format='%(levelname)s %(message)s') elif options.quiet: logging.basicConfig(level=logging.ERROR, format='%(message)s') else: logging.basicConfig(level=logging.INFO, format='%(message)s') if not args: logging.warning('Warning: No command specified, assuming \'publish\'.\n' ' To remove this warning, type: %s publish' % sys.argv[0]) command = 'publish' else: command = args.pop(0) if command not in _COMMANDS: parser.error('invalid command \'%s\'' % command) elif command == 'publish': command_publish(args, options) elif command == 'date': command_date(args, options) if __name__ == '__main__': main() weblog-1.1/doc/style.rst0000644000000000000000000001051611063022120015421 0ustar00usergroup00000000000000Customizing Weblog's appearance =============================== By default Weblog does not have a style-sheet thus looks raw. It is possible to make it more appealing by adding a style-sheet. This document details the possibilities of customizing Weblog's visual appearance. Note to the user ~~~~~~~~~~~~~~~~ On Internet content is more important than appearance. Even with the best graphics and the fanciest website possible, if you don't have content your site will be worthless and nobody will look at it. An interesting post will drive people to your Blog. Your choice of color or a custom logo will not. Don't overspent time on design! Getting started --------------- External CSS ~~~~~~~~~~~~ The recommended way of customizing Weblog visual appearance. Is via an external CSS style-sheet. Add the following line to ``weblog.ini``:: html_head: extra_files: style.css Create a file named ``style.css`` in the source directory and generate a temporary blog to tweak CSS file:: $ cd source/directory $ touch style.css $ weblog -s . -o temporary_blog Open ``temporary_blog/index.html`` in your browser and change the visual appearance by editing ``temporary_blog/style.css``. Inline CSS ~~~~~~~~~~ This method is also valid, but it makes HTML files bigger. The "External CSS" method is prefered over this one. To have the CSS stylesheet embedded into the pages, create a file named ``style.css`` containing:: Pages structure --------------- Most of Weblog HTML tags are associated with an `id` or a `class`. The following tables show the different tags and class associated with it. Base structure ~~~~~~~~~~~~~~ The structure common to all pages. `header` and `footer` are user-defined. +--------------+ | Body | | | | +----------+ | | | header | | | +----------+ | | | div#main | | | +----------+ | | | footer | | | +----------+ | +--------------+ Listing structure ~~~~~~~~~~~~~~~~~ The structure of a listing page contained in the `main div`. +----------------------+ | h1#title | +----------------------+ | p#description | +----------------------+ | List of posts | | | | +------------------+ | | | h2.post-title | | | +------------------+ | | | p.post-header | | | | | | | | +-------------+ | | | | | span.date | | | | | +-------------+ | | | | | span.author | | | | | +-------------+ | | | +------------------+ | | | div.post-content | | | +------------------+ | | | +----------------------+ | hr.footer-ruler | +----------------------+ | div.paginator | | | | +------------------+ | | | a or span + | | | .paginator-link + | | +------------------+ | +----------------------+ Post structure ~~~~~~~~~~~~~~ +------------------+ | h1.post-title | +------------------+ | p.post-header | | | | +-------------+ | | | span.date | | | +-------------+ | | | span.author | | | +-------------+ | +------------------+ | div.post-content | +------------------+ Custom header & footer ---------------------- The custom header and footer make it possible to add a menu bar or logo. To add a custom logo at the top of the blog, create a directory ``html`` in the source directory, and create a file named ``header.html`` in this new directory:: Then edit ``weblog.ini`` and add the following lines:: html_header = html/header.html extra_files = my_fancy_logo.png This insert the content of the file ``html/header.html`` before the blog's title, and copy the file ``my_fancy_logo.png``. CSS resources ------------- Learning and developing with CSS is hard. The CSS syntax tend to be confusing for beginners. The numerous browser incompatibilities makes the designer's work even more complicated. Here is a list of useful resources regarding this subject: * SitePoint_ CSS Reference is helpful if you are a beginner with CSS. It lists all CSS properties and document how well they are supported by the different browsers. * HtmlHelp_ contains a complete HTML 4 reference. .. _HtmlHelp: http://htmlhelp.com/reference/html40/ .. _SitePoint: http://reference.sitepoint.com/css .. vim:se tw=80 sw=2 ts=2 et encoding=utf-8: weblog-1.1/doc/weblog.rst0000644000000000000000000002631511063022120015544 0ustar00usergroup00000000000000Weblog manual ============= :Author: Henry Prêcheur :Reviewers: Anis Kadri, Bastien Simondi, Eric Salama Abstract -------- Simple blog publisher. It reads structured text files and generates static HTML / RSS files. Weblog aims to be simple and robust. In this document *Weblog* is the name of the software. The *web log* concept is referred as the more common term *blog*. According to Wikipedia_: A *blog* (a portmanteau of *web log*) is a website where entries are written in chronological order and commonly displayed in reverse chronological order. .. _Wikipedia: http://en.wikipedia.org/wiki/Blog Pre-requirements ~~~~~~~~~~~~~~~~ - Python version 2.5+ - Jinja version 1.1+ or Jinja 2.0+. Learn how to install Jinja at http://jinja.pocoo.org/2/documentation/intro#installation or http://jinja.pocoo.org/documentation/installation. Installation ------------ Download Weblog's latest version at http://henry.precheur.org/weblog/. Extract it:: tar zxf weblog.tar.gz It can be used right away using the helper script ``weblog_run.py``. Or install it using the supplied ``setup.py`` script. Run ``python setup.py --help`` to learn how to use it. Alternatively if easy_install is present, simply type:: easy_install weblog It fetches the latest version of Weblog and installs it. Quick Start ----------- In the following examples ``weblog/`` represents Weblog's installation directory. If you downloaded the source tarball without installing Weblog; Use the helper script ``weblog_run.py`` instead of the ``weblog`` command:: $ python /path/to/weblog/weblog_run.py --help Create a new directory named ``my_blog``. The $ sign represents the shell prompt, do not type it!:: $ mkdir my_blog Copy from the Weblog installation directory the file ``weblog.ini`` into ``my_blog``:: $ cp weblog/examples/weblog.ini my_blog ``weblog.ini`` is the configuration file of the blog. Check the configuration file section for more information. Do not worry about it now, no modification is required to get the following examples working. Create a file named ``first_post.html`` in the ``my_blog`` directory:: title: First post author: Me date: 2007-08-25 Hello world! Actually all the post filenames must end with ``.html``. Go in the ``my_blog`` directory and run the Weblog using the publish command:: $ cd my_blog/ $ weblog publish It should create a directory named ``output`` containing the generated files. Look at the results by opening the file ``output/index.html`` in your web-browser. The first 3 lines of the file ``first_post.html`` define the post's parameters. These are standard :RFC:`2822` headers (the headers used in Emails). Only ``title`` is mandatory. ``date`` and ``author`` are optional. If you don't fill these fields, the author is the one specified in ``weblog.ini``, and the post's date is the post file's last modification date. The line ``Hello world!`` is the actual content of the post. Note that a blank line is required between the headers and the content. The content is an HTML block. Use the HTML syntax to format your post content. For example create a second file named ``second_post.html``:: title: Second post author: Me (again!) date: 2007-08-26 Second test post!

© 2007 Me

Regenerate the blog files:: $ weblog publish Reload the page in your browser. You should see a second post with some formating. The default post file encoding is ASCII. To use a different encoding specify it via the field ``encoding``:: title: Encoding test date: 2007-11-5 encoding: latin-1 Here you can put some ISO-8856-1 text ... Specify the default encoding in ``weblog.ini``, to avoid setting the encoding field for every file. While writing your blog post, don't bother about the ``date`` field immediately. Weblog automatically sets the date to the filename's last modification time. A good practice though is to set the date when the post gets published. By doing so the date won't get changed if the file gets copied. To set the date of a post, use the command ``date``:: $ date Mon Apr 14 00:10:44 PDT 2008 $ cat my_blog_post.html title: My blog post This is a blog post without any date. $ weblog date my_blog_post.html Setting date to 2008-04-14 00:12:22 in file my_blog_post.html $ cat my_blog_post.html title: My blog post date: 2008-04-14 00:12:22 This is a blog post without any date. $ weblog date my_blog_post 2008-5-15 Setting date to 2008-05-15 in file my_blog_post.html $ cat my_blog_post.html title: My blog post date: 2008-05-15 This is a blog post without any date. The ``date`` command accepts 3 formats as argument: - YEAR-MONTH-DAY (2008-01-31) - YEAR-MONTH-DAY HOUR:MINUTE (2008-01-31 16:45) - YEAR-MONTH-DAY HOUR:MINUTE:SECONDS (2008-01-31 16:45:14) For conciseness the ``date`` command uses aliases to specify commonly used date: - now - today (like now but only set the date, not the time) - tomorrow (now + 24 hours) - next_day (like tomorrow but only sets the date, not the time) Encoding and escaping --------------------- Weblog tries to make sure its output is always *correct*. Non-ASCII characters, are converted to HTML entities so you don't have to worry about it. The output is *never* encoded into ISO-8856-1, UTF-8 or another non-ASCII encoding. Encoding conversions are not so simple in practice. By doing only one conversion to the simplest encoding possible, a lot of problems are solved. The content of the post is not escaped. The title and the date of the post are escaped. The title ``Hello World`` is escaped. HTML tags appear, and no formating is applied to ``world``. The original text "Hello World" appears instead of "Hello *World*", It is possible to override this by specifying ``raw`` as the encoding. Using the ``raw`` encoding nothing is escaped or converted, but you must make sure all characters are ASCII characters:: title: Non-escaped title author: Me <me@my_weblog.org> encoding: raw If the ``raw`` encoding is used, all the characters must be ASCII characters. Otherwise an error is reported. Attaching a file to a post -------------------------- To attach files like images to a blog post, use the field ``files``:: title: Attach a file files: picture.png directory/file a picture a file It will copy ``picture.png`` and ``directory/file``. If ``directory`` does not exist, it will be created. How URI's are handled --------------------- Relative links (````) are rewritten in the RSS file and in some HTML files. In the RSS file ``base_url`` is prepended to the link to make sure it always points to the correct URI. Absolute links (````) are not rewritten. It should always point to the correct location regardless of the context. Note that Weblog considers ``/`` as the root directory. If ``base_url`` is ``http://example.com/``; ``test.html`` and ``/test.html`` are both rewritten to ``http://example.com/test.html``. Command line parameters ----------------------- Usage: weblog [options] Options: -h, --help show this help message and exit -s DIR, --source-dir=DIR The source directory where the blog posts and the file weblog.ini are located -o DIR, --output-dir=DIR The directory where all the generated files are written. If it does not exist it is created. -q, --quiet Do not output anything except critical error messages Configuration file ------------------ All configuration options are in the ``weblog`` section. Learn more about the format of the configuration file: http://docs.python.org/lib/module-ConfigParser.html. A sample configuration file:: [weblog] title: Blog's title url: http://example.com/ description: A sample blog. source_dir: path/to/my/posts output_dir: path/to/output/directory encoding: latin-1 author: Me Fields description ~~~~~~~~~~~~~~~~~~ title The blog's title. It appears at the top of the homepage and in the page's title. This field is mandatory. url The base URL of your blog. For example ``http://my-host.com/my-weblog/``. It is used to generate the absolute URL's to your blog. This field is mandatory. description A short description of your blog. Like "My favorite books reviews", or "Dr. Spock, publications about electronics". Note that it is possible to use multiple lines:: description: My blog about configuration files. The description is merged to a single line; ``My blog about configuration files.``. This field is mandatory. source_dir The directory containing the file ``weblog.ini``, the post files and possibly the ``templates`` directory. By default the current directory. output_dir The output directory. Generated files are put there. By default ``output``. encoding The default post file encoding. Default ``ASCII``. It is overridden by the ``encoding`` field in the post file. author The default author. It is overridden by the ``author`` field in the post file. post_per_page The number of post displayed per listing page. Default is 10. feed_limit The maximum number of post to be included in the Feed file. The most recent posts are the ones included. Default is 10. Note: rss_limit has been renamed to feed_limit. html_head Additional information for the ```` section. Useful to add custom CSS style sheets. Can be a string or a filename. If a file with this name exists in the source directory then it is read. Else it is considered as a string. The result is processed using Jinja. Use the variable ``top_dir`` to link to external files. It contains the path to the top directory of the blog. Examples:: html_head= html_head={{ top_dir }}my_stylesheet.css html_header Additional content located just before the blog content. Can be a string or a filename. (See html_head above) Useful to add a logo or a search box at the top. html_footer Additional content located just after the blog content. Can be a string or a filename. (See html_head above) Useful to add ... A footer! extra_files Additional files to be copied. Typically used to copy CSS style sheets and/or pictures for the blog graphic design. Files are copied into `output_dir`. The path is not preserved: The file `style/weblog.css` gets copied into `output_dir/weblog.css` not into `output_dir/style/weblog.css`. This behavior is likely to change in the future. Tips on Uploading ----------------- rsync_ is a useful tool to upload files generated by Weblog. To make sure rsync does not change the last modification time of the files that did not change, use the following:: rsync --compress --checksum --recursive path/to/blog remote_host:public/dir/ Accurate last modification time makes efficient caching possible. .. _rsync: http://samba.anu.edu.au/rsync/ Need more help? --------------- Don't hesitate to ask questions about Weblog: http://groups.google.com/group/weblog-users or weblog-users@googlegroups.com .. vim:se tw=80 sw=2 ts=2 et encoding=utf-8: weblog-1.1/examples/enconding.html0000644000000000000000000000055111063022120017430 0ustar00usergroup00000000000000title: Weblog encode le français! author: Henry Prêcheur encoding: latin-1 date: 2007-10-01

Weblog encode maintenant le texte correctement! Des caractéres tels que: È, Õ ou Ä sont maintenant bien encodés!
Français, Español & Deutsh :)

The encoding of the file is ISO-8859-1 or latin-1.

weblog-1.1/examples/first_post.html0000644000000000000000000000007411063022120017660 0ustar00usergroup00000000000000title: First post author: Me date: 2007-08-25 Hello world! weblog-1.1/examples/second_post.html0000644000000000000000000000020711063022120020002 0ustar00usergroup00000000000000title: Second post date: 2007-08-26 Second test post!

The author lastname is Prêcheur

weblog-1.1/examples/utf-8.html0000644000000000000000000000016111063022120016424 0ustar00usergroup00000000000000title: Some UTF-8, ç ä é ö ó date: 2008-1-1 encoding: UTF-8 Test post with UTF-8 inside ... ç ä é ö ó weblog-1.1/examples/w3_steely_style.css0000644000000000000000000000036011063022120020444 0ustar00usergroup00000000000000body { text-align: center; /* for IE 4+ */ } div#main { margin: 0 auto; text-align: left; /* counter the body center */ width: 42em; max-width: 90%; } p.weblog-ad { margin: 0 auto; text-align: left !important; } weblog-1.1/examples/weblog.ini0000644000000000000000000000027011063022120016554 0ustar00usergroup00000000000000[weblog] title=Sample blog url=http://blog.sample.org description=Brief description of this sample blog. Do multiline, this way! encoding=UTF-8 author=Me weblog-1.1/examples/weblog_w3_steely_css.ini0000644000000000000000000000051611063022120021425 0ustar00usergroup00000000000000[weblog] title=Sample blog url=http://blog.sample.org description=Brief description of this sample blog. author=Me html_head= extra_files=w3_steely_style.css weblog-1.1/setup.cfg0000644000000000000000000000004711063022120014601 0ustar00usergroup00000000000000[nosetests] verbosity=3 with-doctest=1 weblog-1.1/setup.py0000644000000000000000000000265611063022120014502 0ustar00usergroup00000000000000try: from setuptools import setup except: from distutils.core import setup import os import weblog f = open(os.path.join(os.path.dirname(__file__), 'doc', 'weblog.rst')) # The long description has to be ascii encoded ... long_description = f.read().strip().decode('utf-8').encode('ascii', 'replace') f.close() setup(name="weblog", version=weblog.__version__, packages=['weblog'], package_data={'weblog': ['templates/*.tmpl']}, scripts=['bin/weblog'], requires=['Jinja2 (>=2.0)'], install_requires=['Jinja2'], data_files=[('doc', ['doc/weblog.rst', 'doc/style.rst'])], # unzip the egg so we can access to documentation & templates zip_safe = False, # metadata for upload to PyPI author = 'Henry Precheur', author_email = 'henry@precheur.org', description = ('Simple blog publisher. It reads structured text ' 'files and generates static HTML / RSS files. Weblog ' 'aims to be simple and robust.'), long_description=long_description, license = "ISCL", keywords = "weblog blog journal diary atom", url = "http://henry.precheur.org/weblog/", classifiers=[ 'Development Status :: 5 - Production/Stable', 'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary', 'Intended Audience :: End Users/Desktop', 'Programming Language :: Python', ]) weblog-1.1/test.py0000644000000000000000000003202011063022120014305 0ustar00usergroup00000000000000import os import shutil import tempfile import unittest import email import datetime from textwrap import dedent # make sample configuration files more readable. from StringIO import StringIO from optparse import Values from weblog import Post, PostError, jinja_environment from weblog.publish import load_post_list, generate_index_listing from weblog.date import command_date from weblog.publish import command_publish from weblog.load import load_configuration, ConfigurationError def _file(string): return StringIO(dedent(string)) class TestSimpleLoad(unittest.TestCase): def test_load_post_list(self): post_list = load_post_list('test/simple/') self.assertEqual(len(post_list), 3) sorted_list = sorted(post_list) self.assertEqual(sorted_list[0].title, 'post1') self.assertEqual(sorted_list[1].title, 'post2') self.assertEqual(sorted_list[2].title, 'post3') def test_load_post_list_encoding_failure(self): Post.DEFAULT_ENCODING = 'ascii' self.assertRaises(PostError, load_post_list, 'test/encoding/') def test_load_post_list_encoding(self): Post.DEFAULT_ENCODING = 'UTF-8' post_list = load_post_list('test/encoding/') self.assertEqual(len(post_list), 2) sorted_list = sorted(post_list) self.assertEqual(sorted_list[0].title, u'UTF-8 post \xd6\xc9\xc8\xc4 ...') self.assertEqual(sorted_list[0].content, u'\xd6\xe9\xe8\xe4\n') self.assertEqual(sorted_list[1].title, u'latin post \xd6\xc9\xc8\xc4 ...') self.assertEqual(sorted_list[1].content, u'\xd6\xe9\xe8\xe4\n') class TestPost(unittest.TestCase): def test_simple(self): sample_post = '''\ title: test date: 2008-1-1 author: test author encoding: ascii test.''' post = Post(_file(sample_post)) self.assertEqual(post.title, u'test') self.assertEqual(post.date, datetime.date(2008, 1, 1)) self.assertEqual(post.author, u'test author') self.assertEqual(post.encoding, 'ascii') self.assertEqual(post.content, u'test.') def test_encoding(self): sample_post = u'''\ title: Test UTF-8 \xdcTF-8 ? author: Henry Pr\xeacheur encoding: utf8 blah \xdcTF-8.'''.encode('utf8') # convert to str post = Post(_file(sample_post)) self.assertEqual(post.title, u'Test UTF-8 \xdcTF-8 ?') self.assertEqual(post.author, u'Henry Pr\xeacheur ') self.assertEqual(post.encoding, u'utf8') self.assertEqual(post.content, u'blah \xdcTF-8.') def test_no_payload(self): sample_post = 'title: no payload\ndate: 2008-1-1' try: Post(_file(sample_post)) except PostError, e: self.assertEqual(e.args, (': does not have content',)) else: self.failUnless(False) # Should not be there def test_bad_encoding(self): sample_post = ('title: bad encoding\ndate: 2008-1-1\n' 'encoding: bad-encoding\n\ntest') try: Post(_file(sample_post)) except PostError, e: self.assertEqual(e.args, (': unknown encoding: ' 'bad-encoding',)) else: self.failUnless(False) # Should not be there self.assertRaises(PostError, Post, _file(sample_post)) def test_bad_date(self): sample_post = 'title: bad encoding\ndate: 20 bad date 08-1-1\n\ntest' try: Post(_file(sample_post)) except PostError, e: self.assertEqual(e.args, (": Unable to parse date " "'20 bad date 08-1-1'\n" "(Use YYYY-MM-DD [[HH:MM]:SS] format)",)) else: self.failUnless(False) # Should not be there def test_markdown(self): sample_post = ('title: markdown\ndate: 2008-9-12\n\n' '*boo*\n\n----\nblah') post = Post(_file(sample_post), markup='markdown') self.assertEqual(post.get_html(), (u'

boo

\n\n' '
\n\n

blah

\n')) self.assertEqual(post.get_xhtml(), (u'

boo

\n\n' '
\n\n

blah

\n')) def test_html(self): sample_post = 'title: markdown\ndate: 2008-9-12\n\n

boo
blah

' post = Post(_file(sample_post), markup='html') self.assertEqual(post.get_html(), u'

boo
blah

') self.assertEqual(post.get_xhtml(), u'

boo
blah

') class TestJinja(unittest.TestCase): env = jinja_environment(os.path.dirname(__file__)) def test_renderstring(self): template = self.env.\ from_string('Hello {{ string_template|renderstring }}!') self.assertEqual(template.render(dict(string_template=('{{ foo }} ' 'world'), foo='crazy')), u'Hello crazy world!') def test_renderstring_empty(self): template = self.env.\ from_string('Hello {{ string_template|renderstring }}!') self.assertEqual(template.render(dict(string_template='', foo='crazy')), u'Hello !') def test_format_date_(self): template = self.env.from_string('{{ d|format_date }}') self.assertEqual(template.render(dict(d=datetime.date(2008, 7, 21))), '2008-07-21') self.assertEqual(template.render(dict(d=datetime.datetime(2008, 7, 21, 21, 42, 12, 123))), '2008-07-21 21:42:12') self.assertRaises(TypeError, template.render, dict(d=12)) class TestGeneration(unittest.TestCase): env = jinja_environment(os.path.dirname(__file__)) def setUp(self): self.tempdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tempdir) def test_generate_listing_empty(self): generate_index_listing(10, self.tempdir, self.env.get_template('index.html.tmpl'), list(), dict(title='test', url='http://test.net', description='test')) def test_generate_listing(self): post1 = 'title: post1\ndate: 2008-02-04\n\npost 1' post2 = ('title: post2\ndate: 2008-01-18\nauthor: test@test.com\n\n' 'post 2') post_list = [Post(StringIO(post1)), Post(StringIO(post2))] generate_index_listing(10, self.tempdir, self.env.get_template('index.html.tmpl'), post_list, dict(title='test', url='http://test.net', description='test')) def test_date(self): filename = os.path.join(self.tempdir, 'set_date.html') # First test a message without any date defined def file_without_date(): open(filename, 'w').write('title: Some title\n\nSome content') file_without_date() command_date([filename, '2008-1-1'], None) message = email.message_from_file(open(filename)) self.assert_('date' in message) self.assertEqual(message['date'], str(datetime.date(2008, 1, 1))) # Then test a file which has already a date def file_with_date(): open(filename, 'w').write('title: Some title\ndate: 2008-12-31\n' '\nSome content') file_with_date() command_date([filename, '2008-1-1'], None) message = email.message_from_file(open(filename)) self.assert_('date' in message) self.assertEqual(message['date'], str(datetime.date(2008, 1, 1))) # Test aliases for alias in ('now', 'today', 'tomorrow', 'next_day'): file_without_date() command_date([filename, alias], None) message = email.message_from_file(open(filename)) self.assert_('date' in message) def _test_publish(self, dirname): options = Values(dict(source_dir=os.path.join(os.path.\ dirname(__file__), 'test', dirname), output_dir=self.tempdir, configuration_file='weblog.ini', debug=False)) command_publish(None, options) def test_publish_empty(self): self._test_publish('empty') def test_publish_encoding(self): self._test_publish('encoding') def test_publish_full_url(self): self._test_publish('full_url') def test_publish_simple(self): self._test_publish('simple') class TestConfiguration(unittest.TestCase): def test_empty(self): self.assertRaises(ConfigurationError, load_configuration, _file('')) def test_bad_encoding(self): conf = '''\ [weblog] title=test author=Henry Pr\xc3\xaacheur url=http://example.com/''' # Default encoding is ascii self.assertRaises(ConfigurationError, load_configuration, _file(conf)) def test_encoding(self): conf = u'''\ [weblog] title=test author=Henry Pr\xeacheur encoding=utf8 url=http://example.com/'''.encode('utf8') d = load_configuration(_file(conf)) self.assertEqual(d['author'], u'Henry Pr\xeacheur ') def test_load_simple(self): sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog''' self.assertEqual(load_configuration(_file(sample_configuration)), dict(title=u'Test title', url=u'http://example.com/', feed_limit=10, description=u'Example blog', post_per_page=10)) def test_load_no_title(self): sample_configuration = '''\ [weblog] url = http://example.com description = Example blog''' self.assertRaises(ConfigurationError, load_configuration, _file(sample_configuration)) def test_load_no_title(self): sample_configuration = '''\ [weblog] title = dummy title description = Example blog''' self.assertRaises(ConfigurationError, load_configuration, _file(sample_configuration)) def test_load_feed_limit(self): sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog''' c = load_configuration(_file(sample_configuration)) self.assertEqual(c['feed_limit'], 10) sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog feed_limit = 42''' c = load_configuration(_file(sample_configuration)) self.assertEqual(c['feed_limit'], 42) sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog feed_limit = not_a_number''' self.assertRaises(ConfigurationError, load_configuration, _file(sample_configuration)) def test_load_post_per_page(self): sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog''' c = load_configuration(_file(sample_configuration)) self.assertEqual(c['post_per_page'], 10) sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog post_per_page = 42''' c = load_configuration(_file(sample_configuration)) self.assertEqual(c['post_per_page'], 42) sample_configuration = '''\ [weblog] title = Test title url = http://example.com description = Example blog post_per_page = not_a_number''' self.assertRaises(ConfigurationError, load_configuration, _file(sample_configuration)) if __name__ == '__main__': import nose nose.main() weblog-1.1/test/empty/weblog.ini0000644000000000000000000000014411063022120017053 0ustar00usergroup00000000000000[weblog] title=Test blog url=http://blog.test.org description=Test blog author=test weblog-1.1/test/encoding/latin-1.html0000644000000000000000000000010411063022120017652 0ustar00usergroup00000000000000title: latin post ÖÉÈÄ ... date: 2008-02-04 encoding: latin-1 Öéèä weblog-1.1/test/encoding/utf-8.html0000644000000000000000000000007211063022120017354 0ustar00usergroup00000000000000title: UTF-8 post ÖÉÈÄ ... date: 2008-02-03 Öéèä weblog-1.1/test/encoding/weblog.ini0000644000000000000000000000016311063022120017504 0ustar00usergroup00000000000000[weblog] title=Test blog url=http://blog.test.org description=Test blog author=test encoding=utf-8 weblog-1.1/test/full_url/utf-8.html0000644000000000000000000000025011063022120017410 0ustar00usergroup00000000000000title: UTF-8 post ÖÉÈÄ ... date: 2008-02-03 Öéèä
äyÔÀ Weblog weblog-1.1/test/full_url/weblog.ini0000644000000000000000000000016311063022120017542 0ustar00usergroup00000000000000[weblog] title=Test blog url=http://blog.test.org description=Test blog author=test encoding=utf-8 weblog-1.1/test/simple/post1.html0000644000000000000000000000004511063022120017162 0ustar00usergroup00000000000000title: post1 date: 2007-01-01 post1 weblog-1.1/test/simple/post2.html0000644000000000000000000000004411063022120017162 0ustar00usergroup00000000000000title: post2 date: 2007-6-15 post2 weblog-1.1/test/simple/post3.html0000644000000000000000000000004511063022120017164 0ustar00usergroup00000000000000title: post3 date: 2007-12-31 post3 weblog-1.1/test/simple/weblog.ini0000644000000000000000000000014411063022120017206 0ustar00usergroup00000000000000[weblog] title=Test blog url=http://blog.test.org description=Test blog author=test weblog-1.1/weblog/__init__.py0000644000000000000000000000153011063022120016346 0ustar00usergroup00000000000000from load import load_configuration, load_post_list from post import Post, PostError from _jinja_environment import jinja_environment from html_full_url import html_full_url from publish import command_publish from date import command_date import listing __author__ = 'Henry Precheur ' __version__ = '1.1' __license__ = 'ISCL' __all__ = ('Post', 'PostError', 'listing', 'jinja_environment', 'load_configuration', 'load_post_list', 'html_full_url', 'command_publish', 'command_date') def main(): import doctest import utils import post import listing import html_full_url import date doctest.testmod(utils) doctest.testmod(post) doctest.testmod(listing) doctest.testmod(html_full_url) doctest.testmod(date) doctest.testmod() if __name__ == '__main__': main() weblog-1.1/weblog/_jinja_environment.py0000644000000000000000000000454011063022120020471 0ustar00usergroup00000000000000import os import sys import datetime from utils import format_date import rfc3339 try: from jinja2 import Environment, FileSystemLoader, ChoiceLoader from jinja2 import environmentfilter, contextfilter, Markup @contextfilter def renderstring(context, value): ''' Render the passed string. It is similar to the tag rendertemplate, except it uses the passed string as the template. Example: The template 'Hello {{ string_template|renderstring }}!'; Called with the following context: dict(string_template='{{ foo }} world', foo='crazy') Renders to: 'Hello crazy world!' ''' if value: env = context.environment result = env.from_string(value).render(context.get_all()) if env.autoescape: result = Markup(result) return result else: return '' def format_date_(value): return format_date(value) def rfc3339_(value): return rfc3339.rfc3339(value) def decode(value): if value: return value.encode('ascii', 'xmlcharrefreplace') else: return '' except ImportError: raise SystemExit('Please install Jinja 2 (http://jinja.pocoo.org/2/)' ' to use Weblog') def jinja_environment(source_dir): """ Build the Jinja environment. Setup all template loaders. """ TEMPLATE_DIR = 'templates' fs_loader = FileSystemLoader(os.path.join(source_dir, TEMPLATE_DIR)) fs_app_loader = FileSystemLoader(os.path.join(sys.path[0], 'weblog', TEMPLATE_DIR)) # if setuptools is present use the loader else fake it. try: import pkg_resources from jinja import PackageLoader except ImportError: pkg_loader = FileSystemLoader(os.path.join(os.path.dirname(__file__), TEMPLATE_DIR)) else: pkg_loader = PackageLoader('weblog', TEMPLATE_DIR) choice_loader = ChoiceLoader([fs_loader, fs_app_loader, pkg_loader]) env = Environment(loader=choice_loader, trim_blocks=True) env.filters['renderstring'] = renderstring env.filters['format_date'] = format_date_ env.filters['rfc3339'] = rfc3339_ env.filters['decode'] = decode return env weblog-1.1/weblog/date.py0000644000000000000000000000362711063022120015535 0ustar00usergroup00000000000000import sys import logging import datetime import email from utils import format_date from post import Post def command_date(args, options): ''' Execute the 'date' command, which set the date to the specified filename. The command need at least one parameter. The remaining parameters are the date to be set in the file. >>> command_date(None, None) # doctest: +ELLIPSIS Traceback (most recent call last): ... SystemExit: No file specified: ... >>> command_date(['/dev/null', '2008-1000-10'], None) Traceback (most recent call last): ... SystemExit: Unable to parse date '2008-1000-10' (Use YYYY-MM-DD [[HH:MM]:SS] format) ''' if not args: raise SystemExit('No file specified:\n' '%s date filename [date]' % sys.argv[0]) filename = args.pop(0) if args: if len(args) == 1 and args[0] == 'today': date = datetime.date.today() elif len(args) == 1 and args[0] == 'next_day': date = datetime.date.today() + datetime.timedelta(days=1) elif len(args) == 1 and args[0] == 'tomorrow': date = datetime.datetime.now() + datetime.timedelta(days=1) elif len(args) == 1 and args[0] == 'now': date = datetime.datetime.now() else: try: date = Post.parse_date(' '.join(args)) except ValueError, error: raise SystemExit(error) else: date = datetime.datetime.now() logging.info('Setting date to %s in file %s', format_date(date), filename) try: post_file = email.message_from_file(file(filename)) if 'date' in post_file: post_file.replace_header('date', format_date(date)) else: post_file.add_header('date', format_date(date)) file(filename, 'w').write(post_file.as_string()) except IOError, error: raise SystemExit(error) weblog-1.1/weblog/html_full_url.py0000644000000000000000000001171211063022120017462 0ustar00usergroup00000000000000import re from utf8_html_parser import UTF8HTMLParser # Ignore http:// ftp:// mailto: javascript: ... _scheme_regex = re.compile(r'\w+:') def internal_url(url): ''' Returns True if ``url`` refers to an external resource. >>> internal_url('http://www.google.ca/') False >>> internal_url('mailto:me@example.com') False >>> internal_url('javascript:return false;') False >>> internal_url('/pic.jpg') True >>> internal_url('') True ''' if _scheme_regex.match(url): return False else: return True class FullUrlHtmlParser(UTF8HTMLParser): ''' Parse an HTML document and transform relative URI to absolute URI. Prepending ``base_url`` to them:: >>> p = FullUrlHtmlParser('http://www.example.com') >>> p.feed(u'') >>> p.get_value() u"" Non-external resource are ignored:: >>> p = FullUrlHtmlParser('http://www.example.com') >>> p.feed('') >>> p.get_value() u"" A more complex example:: >>> p.reset() >>> p.feed(r""" ... ... foo ... ... some random text. ... bar ... »~ ... ... ... More ..........""") >>> print p.get_value() #doctest: +NORMALIZE_WHITESPACE foo some random text. bar »~ More .......... ''' def __init__(self, base_url): UTF8HTMLParser.__init__(self) self.base_url = base_url.rstrip('/') @staticmethod def html_attrs(attrs): ''' >>> FullUrlHtmlParser.html_attrs((('src', 'pic.jpg'), ('alt', 'pic'))) u"src='pic.jpg' alt='pic'" >>> FullUrlHtmlParser.html_attrs(list()) u'' ''' return u' '.join(u'%s=\'%s\'' % (k, v) for k, v in attrs) def make_full_url(self, attr, attrs): ''' Change ``attrs[attr]`` from a relative URI to an absolute URI. >>> p = FullUrlHtmlParser('http://www.example.com') >>> tuple(p.make_full_url('src', (('src', 'page'), ('foo', 'bar')))) (('src', 'http://www.example.com/page'), ('foo', 'bar')) >>> tuple(p.make_full_url('src', tuple())) () ''' for key, value in attrs: if key == attr and internal_url(value): yield (key, self.base_url + '/' + value) else: yield (key, value) def rewrite_tag(self, tag, attrs, endtag=u''): ''' >>> p = FullUrlHtmlParser('http://www.example.com') >>> p.rewrite_tag('a', (('href', 'foo'),)) u"" >>> p.rewrite_tag('img', (('src', 'pic.png'), ('width', '100'))) u"" ''' if attrs: if tag == u'a': attrs = self.make_full_url(u'href', attrs) elif tag == u'img': attrs = self.make_full_url(u'src', attrs) elif tag == u'object': attrs = self.make_full_url(u'data', attrs) attrs = self.make_full_url(u'codebase', attrs) elif tag == u'script': attrs = self.make_full_url(u'src', attrs) return u'<%s %s%s>' % (tag, self.html_attrs(attrs), endtag) else: return u'<%s%s>' % (tag, endtag) def handle_starttag(self, tag, attrs): self.output.append(self.rewrite_tag(tag, attrs)) def handle_startendtag(self, tag, attrs): self.output.append(self.rewrite_tag(tag, attrs, endtag=u'/')) def html_full_url(base_url, text): ''' Appends ``base_url`` to relative uri's in the HTML document ``text``. Example with ``base_url=http://example.com``:: '' becomes '' '' becomes '' but ' is not changed since it is an *absolute* URI. >>> html_full_url('http://example.com', '') u"" >>> html_full_url('http://example.com', '') u"" ''' p = FullUrlHtmlParser(base_url) p.feed(text) return p.get_value() if __name__ == '__main__': import doctest doctest.testmod() weblog-1.1/weblog/html_to_xhtml.py0000644000000000000000000000333711063022120017500 0ustar00usergroup00000000000000import logging from htmlentitydefs import name2codepoint, entitydefs from utf8_html_parser import UTF8HTMLParser class _Parser(UTF8HTMLParser): ''' Parse an HTML document and convert it to valid xhtml. ''' _EMPTY_HTML_TAGS = ('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param') _XML_ENTITIES = ('amp', 'gt', 'lt', 'quot') def handle_starttag(self, tag, attrs): if tag in self._EMPTY_HTML_TAGS: self.handle_startendtag(tag, attrs) elif attrs: self.output.append(u'<%s %s>' % (tag, self.html_attrs(attrs))) else: self.output.append(u'<%s>' % tag) def handle_startendtag(self, tag, attrs): if attrs: self.output.append(u'<%s %s />' % (tag, self.html_attrs(attrs))) else: self.output.append('<%s />' % tag) def handle_entityref(self, name): if name in self._XML_ENTITIES: self.output.append(u'&%s;' % name) elif name in name2codepoint: self.output.append(u'&#%d;' % name2codepoint[name]) else: logging.warning('Unknown XHTML entiry: &%s;' % name); def html_to_xhtml(html): ''' Convert html to xhtml >>> html_to_xhtml('

Hello
World

') u'

Hello
World

' >>> html_to_xhtml('Test & —') u'Test & —' >>> html_to_xhtml("
test") u"test" >>> html_to_xhtml("— > & &unknown;") u'— > & ' ''' p = _Parser() p.feed(html) return p.get_value() if __name__ == '__main__': import doctest doctest.testmod() weblog-1.1/weblog/listing.py0000644000000000000000000001122411063022120016261 0ustar00usergroup00000000000000from os.path import join class Page(object): ''' Page contains a `string key` named title used to compare against other `Page`s and strings. It is used for the pagination. The item can be whatever you want. >>> page_list = sorted([Page('2', 2), Page('3', None), Page('1', '1')]) >>> page_list [, , ] >>> for i in page_list: ... print i.title, i.item 1 1 2 2 3 None >>> '2' in page_list True >>> '5' in page_list False >>> page_list.index('2') 1 ''' def __init__(self, title, item): ''' >>> Page('foo', 'bar') ''' super(Page, self).__init__() self.title = title self.item = item def filename(self): return self.title + '.html' def url(self): ''' >>> Page('foo', 'bar').url() 'foo.html' ''' return self.filename() def __repr__(self): return '<%s(%r, %r)>' % \ (self.__class__.__name__, self.title, self.item) def __cmp__(self, other): ''' >>> Page('1', '') > '2' False >>> Page('foo', 'string') == 'foo' True >>> Page('2', '') > Page('4', '') False >>> Page('bar', '') == Page('bar', '') True >>> Page('1', '') == None # base object comparison False ''' if isinstance(other, Page): return cmp(self.title, other.title) elif isinstance(other, self.title.__class__): return cmp(self.title, other) else: return cmp(id(self), id(other)) class PageIndex(Page): ''' Special case to have a page that returns 'index.html' as filename. Used for the first page of the listing. ''' def filename(self): return 'index.html' def slice_list(full_list, limit): ''' Return an iterable containing the given list sliced in sub-lists of size `limit`. >>> list(slice_list(range(10), 3)) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] >>> list(slice_list([], 10)) [] >>> list(slice_list([1, 2, 3], 5)) [[1, 2, 3]] >>> list(slice_list([1, 2, 3], 3)) [[1, 2, 3]] ''' it = iter(full_list) result = list() try: while True: for c in xrange(limit): result.append(it.next()) yield result result = list() except StopIteration: if result: yield result raise StopIteration def slice_list_groupby(full_list, func): ''' >>> slice_list_groupby(range(10), lambda x: 'even' if x % 2 == 0 else 'odd') {'even': [0, 2, 4, 6, 8], 'odd': [1, 3, 5, 7, 9]} >>> slice_list_groupby([1, 2, 3], lambda x: x) {1: [1], 2: [2], 3: [3]} >>> slice_list_groupby([dict(key='1'), dict(key='2'), dict(key='3')], ... lambda x: x['key']) {'1': [{'key': '1'}], '3': [{'key': '3'}], '2': [{'key': '2'}]} ''' d = dict() for i in full_list: key = func(i) if key in d: d[key].append(i) else: d[key] = [i] return d def generate_list(output_dir, template, page_list, template_params): for page in page_list: filename = join(output_dir, page.filename()) output_file = file(filename, 'w') output_file.write(template.render(post_list=page.item, page=page, pages=page_list, **template_params)) output_file.close() def generate_index_listing(limit, output_dir, template, post_list, template_params): ''' Generate a listing containing at most `limit` post per page. ''' # list() needed since pages is subscripted later pages = list(slice_list(post_list, limit)) if pages: pages = [PageIndex('0', pages[0])] + \ [Page(str(k + 1), v) for k, v in enumerate(pages[1:])] else: # If there is no page generates an 'empty' page pages = [PageIndex('0', list())] generate_list(output_dir, template, pages, template_params) def generate_monthly_listing(output_dir, template, post_list, template_params): pages = slice_list_groupby(post_list, lambda x: str(x.date.year) + '-' + str(x.date.month)) pages = list(Page(str(k), pages[k]) for k in (sorted(pages.keys()))) generate_list(output_dir, template, pages, template_params) if __name__ == '__main__': import doctest doctest.testmod() weblog-1.1/weblog/load.py0000644000000000000000000001052711063022120015534 0ustar00usergroup00000000000000import os import logging from utils import load_if_filename import ConfigParser from post import Post class ConfigurationError(Exception): def __init__(self, filename, error, *args): Exception.__init__(self, "Error in '%s': %s" % (filename, error)) _CONFIGURATION_KEYS = ('title', 'url', 'description', 'source_dir', 'encoding', 'output_dir', 'author', 'post_per_page', 'feed_limit', 'html_head', 'html_header', 'html_footer', 'extra_files') def load_configuration(configuration_file, source_dir=None): ''' Read the file ``config_file`` and sanitise it. Returns a dictionnary containing the parameters from the [weblog] section. All strings are converted to `unicode`. ''' if isinstance(configuration_file, basestring): try: f = open(configuration_file) filename = configuration_file except IOError: # The file was not found try to load it from the source directory if # it is just a filename. if os.path.basename(configuration_file) == configuration_file: filename = os.path.join(source_dir, configuration_file) f = open(filename) else: raise else: f = configuration_file filename = 'unknown filename' try: config_parser = ConfigParser.SafeConfigParser() config_parser.readfp(f) config_dict = dict(config_parser.items('weblog')) except ConfigParser.Error, e: raise ConfigurationError(filename, e) encoding = config_dict.get('encoding') or 'ascii' try: for key, value in config_dict.iteritems(): if key in _CONFIGURATION_KEYS: config_dict[key] = unicode(value, encoding) else: if key == 'rss_limit': logging.warning('rss_limit is obsolete, use feed_limit ' 'instead.') else: logging.warning("unknown key '%s' in %s" % (key, filename)) except UnicodeDecodeError, e: raise ConfigurationError(filename, "for key '%s', %s" % (key, e)) try: # Check that at least 'title' and 'url' are presents config_dict['title'] if not config_dict['url'].endswith('/'): config_dict['url'] += '/' def _load_if_filename(key): if key in config_dict: config_dict[key] = load_if_filename(source_dir, config_dict[key]) _load_if_filename('html_head') _load_if_filename('html_header') _load_if_filename('html_footer') def config_set_int(key, default): try: config_dict[key] = int(config_dict.get(key, default)) except ValueError, e: raise ConfigurationError(filename, "Error in configuration file '%s' " "'%s': %s" % (configuration_file, key, e)) config_set_int('post_per_page', 10) config_set_int('feed_limit', 10) except KeyError, e: raise ConfigurationError(filename, "Unable to find %s in configuration file " "'%s'" % (e, configuration_file)) else: return config_dict def load_post_list(path): ''' List and load all the files ending with '.html' in the passed directory. Returns a list containing ``Post`` objects created using the loaded files. ''' post_list = set() for filename in os.listdir(path): if filename.endswith('.html') or filename.endswith('.txt'): logging.debug('Loading \'%s\'', filename) p = Post(os.path.join(path, filename)) if p in post_list: logging.debug('%r is duplicated', p) for duplicated_post in post_list: if duplicated_post == p: break raise IOError('"%s", there is already a post ' 'with this title and date ("%s")' % \ (filename, duplicated_post.get_filename())) else: post_list.add(p) else: logging.debug('Ignoring \'%s\'', filename) return post_list weblog-1.1/weblog/markdown2.py0000755000000000000000000021762411063022120016533 0ustar00usergroup00000000000000#!/usr/bin/env python # Copyright (c) 2007-2008 ActiveState Corp. # License: MIT (http://www.opensource.org/licenses/mit-license.php) r"""A fast and complete Python implementation of Markdown. [from http://daringfireball.net/projects/markdown/] > Markdown is a text-to-HTML filter; it translates an easy-to-read / > easy-to-write structured text format into HTML. Markdown's text > format is most similar to that of plain text email, and supports > features such as headers, *emphasis*, code blocks, blockquotes, and > links. > > Markdown's syntax is designed not as a generic markup language, but > specifically to serve as a front-end to (X)HTML. You can use span-level > HTML tags anywhere in a Markdown document, and you can use block level > HTML tags (like
and as well). Module usage: >>> import markdown2 >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` u'

boo!

\n' >>> markdowner = Markdown() >>> markdowner.convert("*boo!*") u'

boo!

\n' >>> markdowner.convert("**boom!**") u'

boom!

\n' This implementation of Markdown implements the full "core" syntax plus a number of extras (e.g., code syntax coloring, footnotes) as described on . """ cmdln_desc = """A fast and complete Python implementation of Markdown, a text-to-HTML conversion tool for web writers. """ # Dev Notes: # - There is already a Python markdown processor # (http://www.freewisdom.org/projects/python-markdown/). # - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. __version_info__ = (1, 0, 1, 9, '+') # first three nums match Markdown.pl __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" import os import sys from pprint import pprint import re import logging try: from hashlib import md5 except ImportError: from md5 import md5 import optparse from random import random import codecs #---- Python version compat if sys.version_info[:2] < (2,4): from sets import Set as set def reversed(sequence): for i in sequence[::-1]: yield i def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): return unicode(s, encoding, errors) else: def _unicode_decode(s, encoding, errors='strict'): return s.decode(encoding, errors) #---- globals DEBUG = False log = logging.getLogger("markdown") DEFAULT_TAB_WIDTH = 4 # Table of hash values for escaped characters: def _escape_hash(s): # Lame attempt to avoid possible collision with someone actually # using the MD5 hexdigest of one of these chars in there text. # Other ideas: random.random(), uuid.uuid() #return md5(s).hexdigest() # Markdown.pl effectively does this. return 'md5:'+md5(s).hexdigest() g_escape_table = dict([(ch, _escape_hash(ch)) for ch in '\\`*_{}[]()>#+-.!']) #---- exceptions class MarkdownError(Exception): pass #---- public api def markdown_path(path, encoding="utf-8", html4tags=False, tab_width=DEFAULT_TAB_WIDTH, safe_mode=None, extras=None, link_patterns=None, use_file_vars=False): text = codecs.open(path, 'r', encoding).read() return Markdown(html4tags=html4tags, tab_width=tab_width, safe_mode=safe_mode, extras=extras, link_patterns=link_patterns, use_file_vars=use_file_vars).convert(text) def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, safe_mode=None, extras=None, link_patterns=None, use_file_vars=False): return Markdown(html4tags=html4tags, tab_width=tab_width, safe_mode=safe_mode, extras=extras, link_patterns=link_patterns, use_file_vars=use_file_vars).convert(text) class Markdown(object): # The dict of "extras" to enable in processing -- a mapping of # extra name to argument for the extra. Most extras do not have an # argument, in which case the value is None. # # This can be set via (a) subclassing and (b) the constructor # "extras" argument. extras = None urls = None titles = None html_blocks = None html_spans = None html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py # Used to track when we're inside an ordered or unordered list # (see _ProcessListItems() for details): list_level = 0 _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) def __init__(self, html4tags=False, tab_width=4, safe_mode=None, extras=None, link_patterns=None, use_file_vars=False): if html4tags: self.empty_element_suffix = ">" else: self.empty_element_suffix = " />" self.tab_width = tab_width # For compatibility with earlier markdown2.py and with # markdown.py's safe_mode being a boolean, # safe_mode == True -> "replace" if safe_mode is True: self.safe_mode = "replace" else: self.safe_mode = safe_mode if self.extras is None: self.extras = {} elif not isinstance(self.extras, dict): self.extras = dict([(e, None) for e in self.extras]) if extras: if not isinstance(extras, dict): extras = dict([(e, None) for e in extras]) self.extras.update(extras) assert isinstance(self.extras, dict) self._instance_extras = self.extras.copy() self.link_patterns = link_patterns self.use_file_vars = use_file_vars self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) def reset(self): self.urls = {} self.titles = {} self.html_blocks = {} self.html_spans = {} self.list_level = 0 self.extras = self._instance_extras.copy() if "footnotes" in self.extras: self.footnotes = {} self.footnote_ids = [] def convert(self, text): """Convert the given text.""" # Main function. The order in which other subs are called here is # essential. Link and image substitutions need to happen before # _EscapeSpecialChars(), so that any *'s or _'s in the # and tags get encoded. # Clear the global hashes. If we don't clear these, you get conflicts # from other articles when generating a page which contains more than # one article (e.g. an index page that shows the N most recent # articles): self.reset() if not isinstance(text, unicode): #TODO: perhaps shouldn't presume UTF-8 for string input? text = unicode(text, 'utf-8') if self.use_file_vars: # Look for emacs-style file variable hints. emacs_vars = self._get_emacs_vars(text) if "markdown-extras" in emacs_vars: splitter = re.compile("[ ,]+") for e in splitter.split(emacs_vars["markdown-extras"]): if '=' in e: ename, earg = e.split('=', 1) try: earg = int(earg) except ValueError: pass else: ename, earg = e, None self.extras[ename] = earg # Standardize line endings: text = re.sub("\r\n|\r", "\n", text) # Make sure $text ends with a couple of newlines: text += "\n\n" # Convert all tabs to spaces. text = self._detab(text) # Strip any lines consisting only of spaces and tabs. # This makes subsequent regexen easier to write, because we can # match consecutive blank lines with /\n+/ instead of something # contorted like /[ \t]*\n+/ . text = self._ws_only_line_re.sub("", text) if self.safe_mode: text = self._hash_html_spans(text) # Turn block-level HTML blocks into hash entries text = self._hash_html_blocks(text, raw=True) # Strip link definitions, store in hashes. if "footnotes" in self.extras: # Must do footnotes first because an unlucky footnote defn # looks like a link defn: # [^4]: this "looks like a link defn" text = self._strip_footnote_definitions(text) text = self._strip_link_definitions(text) text = self._run_block_gamut(text) text = self._unescape_special_chars(text) if "footnotes" in self.extras: text = self._add_footnotes(text) if self.safe_mode: text = self._unhash_html_spans(text) text += "\n" return text _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) # This regular expression is intended to match blocks like this: # PREFIX Local Variables: SUFFIX # PREFIX mode: Tcl SUFFIX # PREFIX End: SUFFIX # Some notes: # - "[ \t]" is used instead of "\s" to specifically exclude newlines # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does # not like anything other than Unix-style line terminators. _emacs_local_vars_pat = re.compile(r"""^ (?P(?:[^\r\n|\n|\r])*?) [\ \t]*Local\ Variables:[\ \t]* (?P.*?)(?:\r\n|\n|\r) (?P.*?\1End:) """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) def _get_emacs_vars(self, text): """Return a dictionary of emacs-style local variables. Parsing is done loosely according to this spec (and according to some in-practice deviations from this): http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables """ emacs_vars = {} SIZE = pow(2, 13) # 8kB # Search near the start for a '-*-'-style one-liner of variables. head = text[:SIZE] if "-*-" in head: match = self._emacs_oneliner_vars_pat.search(head) if match: emacs_vars_str = match.group(1) assert '\n' not in emacs_vars_str emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') if s.strip()] if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: # While not in the spec, this form is allowed by emacs: # -*- Tcl -*- # where the implied "variable" is "mode". This form # is only allowed if there are no other variables. emacs_vars["mode"] = emacs_var_strs[0].strip() else: for emacs_var_str in emacs_var_strs: try: variable, value = emacs_var_str.strip().split(':', 1) except ValueError: log.debug("emacs variables error: malformed -*- " "line: %r", emacs_var_str) continue # Lowercase the variable name because Emacs allows "Mode" # or "mode" or "MoDe", etc. emacs_vars[variable.lower()] = value.strip() tail = text[-SIZE:] if "Local Variables" in tail: match = self._emacs_local_vars_pat.search(tail) if match: prefix = match.group("prefix") suffix = match.group("suffix") lines = match.group("content").splitlines(0) #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ # % (prefix, suffix, match.group("content"), lines) # Validate the Local Variables block: proper prefix and suffix # usage. for i, line in enumerate(lines): if not line.startswith(prefix): log.debug("emacs variables error: line '%s' " "does not use proper prefix '%s'" % (line, prefix)) return {} # Don't validate suffix on last line. Emacs doesn't care, # neither should we. if i != len(lines)-1 and not line.endswith(suffix): log.debug("emacs variables error: line '%s' " "does not use proper suffix '%s'" % (line, suffix)) return {} # Parse out one emacs var per line. continued_for = None for line in lines[:-1]: # no var on the last line ("PREFIX End:") if prefix: line = line[len(prefix):] # strip prefix if suffix: line = line[:-len(suffix)] # strip suffix line = line.strip() if continued_for: variable = continued_for if line.endswith('\\'): line = line[:-1].rstrip() else: continued_for = None emacs_vars[variable] += ' ' + line else: try: variable, value = line.split(':', 1) except ValueError: log.debug("local variables error: missing colon " "in local variables entry: '%s'" % line) continue # Do NOT lowercase the variable name, because Emacs only # allows "mode" (and not "Mode", "MoDe", etc.) in this block. value = value.strip() if value.endswith('\\'): value = value[:-1].rstrip() continued_for = variable else: continued_for = None emacs_vars[variable] = value # Unquote values. for var, val in emacs_vars.items(): if len(val) > 1 and (val.startswith('"') and val.endswith('"') or val.startswith('"') and val.endswith('"')): emacs_vars[var] = val[1:-1] return emacs_vars # Cribbed from a post by Bart Lateur: # _detab_re = re.compile(r'(.*?)\t', re.M) def _detab_sub(self, match): g1 = match.group(1) return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) def _detab(self, text): r"""Remove (leading?) tabs from a file. >>> m = Markdown() >>> m._detab("\tfoo") ' foo' >>> m._detab(" \tfoo") ' foo' >>> m._detab("\t foo") ' foo' >>> m._detab(" foo") ' foo' >>> m._detab(" foo\n\tbar\tblam") ' foo\n bar blam' """ if '\t' not in text: return text return self._detab_re.subn(self._detab_sub, text)[0] _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' _strict_tag_block_re = re.compile(r""" ( # save in \1 ^ # start of line (with re.M) <(%s) # start tag = \2 \b # word break (.*\n)*? # any number of lines, minimally matching # the matching end tag [ \t]* # trailing spaces/tabs (?=\n+|\Z) # followed by a newline or end of document ) """ % _block_tags_a, re.X | re.M) _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' _liberal_tag_block_re = re.compile(r""" ( # save in \1 ^ # start of line (with re.M) <(%s) # start tag = \2 \b # word break (.*\n)*? # any number of lines, minimally matching .* # the matching end tag [ \t]* # trailing spaces/tabs (?=\n+|\Z) # followed by a newline or end of document ) """ % _block_tags_b, re.X | re.M) # Save for usage in coming 'xml' extra. XXX_liberal_tag_block_re = re.compile(r""" ( # save in \1 ^ # start of line (with re.M) (?: <(%s|\w+:\w+) # start tag = \2 \b # word break (?:.*\n)*? # any number of lines, minimally matching .* # the matching end tag | <(\w+:)?\w+ # single tag-start \b # word break .*? # any content on one line, minimally matching /> # end of tag | <\?\w+ # start of processing instruction \b # word break .*? # any content on one line, minimally matching \?> # the PI end tag ) [ \t]* # trailing spaces/tabs (?=\n+|\Z) # followed by a newline or end of document ) """ % _block_tags_b, re.X | re.M) def _hash_html_block_sub(self, match, raw=False): html = match.group(1) if raw and self.safe_mode: html = self._sanitize_html(html) key = _hash_text(html) self.html_blocks[key] = html return "\n\n" + key + "\n\n" def _hash_html_blocks(self, text, raw=False): """Hashify HTML blocks We only want to do this for block-level HTML tags, such as headers, lists, and tables. That's because we still want to wrap

s around "paragraphs" that are wrapped in non-block-level tags, such as anchors, phrase emphasis, and spans. The list of tags we're looking for is hard-coded. @param raw {boolean} indicates if these are raw HTML blocks in the original source. It makes a difference in "safe" mode. """ if '<' not in text: return text # Pass `raw` value into our calls to self._hash_html_block_sub. hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) # First, look for nested blocks, e.g.: #

#
# tags for inner block must be indented. #
#
# # The outermost tags must start at the left margin for this to match, and # the inner nested divs must be indented. # We need to do this before the next, more liberal match, because the next # match will start at the first `
` and stop at the first `
`. text = self._strict_tag_block_re.sub(hash_html_block_sub, text) # Now match more liberally, simply from `\n` to `\n` text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) # Special case just for
. It was easier to make a special # case than to make the other regex more complicated. if "", start_idx) + 3 except ValueError, ex: break # Start position for next comment block search. start = end_idx # Validate whitespace before comment. if start_idx: # - Up to `tab_width - 1` spaces before start_idx. for i in range(self.tab_width - 1): if text[start_idx - 1] != ' ': break start_idx -= 1 if start_idx == 0: break # - Must be preceded by 2 newlines or hit the start of # the document. if start_idx == 0: pass elif start_idx == 1 and text[0] == '\n': start_idx = 0 # to match minute detail of Markdown.pl regex elif text[start_idx-2:start_idx] == '\n\n': pass else: break # Validate whitespace after comment. # - Any number of spaces and tabs. while end_idx < len(text): if text[end_idx] not in ' \t': break end_idx += 1 # - Must be following by 2 newlines or hit end of text. if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): continue # Escape and hash (must match `_hash_html_block_sub`). html = text[start_idx:end_idx] if raw and self.safe_mode: html = self._sanitize_html(html) key = _hash_text(html) self.html_blocks[key] = html text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] return text def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. less_than_tab = self.tab_width - 1 # Link defs are in the form: # [id]: url "optional title" _link_def_re = re.compile(r""" ^[ ]{0,%d}\[(.+)\]: # id = \1 [ \t]* \n? # maybe *one* newline [ \t]* ? # url = \2 [ \t]* (?: \n? # maybe one newline [ \t]* (?<=\s) # lookbehind for whitespace ['"(] ([^\n]*) # title = \3 ['")] [ \t]* )? # title is optional (?:\n+|\Z) """ % less_than_tab, re.X | re.M | re.U) return _link_def_re.sub(self._extract_link_def_sub, text) def _extract_link_def_sub(self, match): id, url, title = match.groups() key = id.lower() # Link IDs are case-insensitive self.urls[key] = self._encode_amps_and_angles(url) if title: self.titles[key] = title.replace('"', '"') return "" def _extract_footnote_def_sub(self, match): id, text = match.groups() text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() normed_id = re.sub(r'\W', '-', id) # Ensure footnote text ends with a couple newlines (for some # block gamut matches). self.footnotes[normed_id] = text + "\n\n" return "" def _strip_footnote_definitions(self, text): """A footnote definition looks like this: [^note-id]: Text of the note. May include one or more indented paragraphs. Where, - The 'note-id' can be pretty much anything, though typically it is the number of the footnote. - The first paragraph may start on the next line, like so: [^note-id]: Text of the note. """ less_than_tab = self.tab_width - 1 footnote_def_re = re.compile(r''' ^[ ]{0,%d}\[\^(.+)\]: # id = \1 [ \t]* ( # footnote text = \2 # First line need not start with the spaces. (?:\s*.*\n+) (?: (?:[ ]{%d} | \t) # Subsequent lines must be indented. .*\n+ )* ) # Lookahead for non-space at line-start, or end of doc. (?:(?=^[ ]{0,%d}\S)|\Z) ''' % (less_than_tab, self.tab_width, self.tab_width), re.X | re.M) return footnote_def_re.sub(self._extract_footnote_def_sub, text) _hr_res = [ re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), ] def _run_block_gamut(self, text): # These are all the transformations that form block-level # tags like paragraphs, headers, and list items. text = self._do_headers(text) # Do Horizontal Rules: hr = "\n tags around block-level tags. text = self._hash_html_blocks(text) text = self._form_paragraphs(text) return text def _pyshell_block_sub(self, match): lines = match.group(0).splitlines(0) _dedentlines(lines) indent = ' ' * self.tab_width s = ('\n' # separate from possible cuddled paragraph + indent + ('\n'+indent).join(lines) + '\n\n') return s def _prepare_pyshell_blocks(self, text): """Ensure that Python interactive shell sessions are put in code blocks -- even if not properly indented. """ if ">>>" not in text: return text less_than_tab = self.tab_width - 1 _pyshell_block_re = re.compile(r""" ^([ ]{0,%d})>>>[ ].*\n # first line ^(\1.*\S+.*\n)* # any number of subsequent lines ^\n # ends with a blank line """ % less_than_tab, re.M | re.X) return _pyshell_block_re.sub(self._pyshell_block_sub, text) def _run_span_gamut(self, text): # These are all the transformations that occur *within* block-level # tags like paragraphs, headers, and list items. text = self._do_code_spans(text) text = self._escape_special_chars(text) # Process anchor and image tags. text = self._do_links(text) # Make links out of things like `` # Must come after _do_links(), because you can use < and > # delimiters in inline links like [this](). text = self._do_auto_links(text) if "link-patterns" in self.extras: text = self._do_link_patterns(text) text = self._encode_amps_and_angles(text) text = self._do_italics_and_bold(text) # Do hard breaks: text = re.sub(r" {2,}\n", " | # auto-link (e.g., ) <\w+[^>]*> | # comment | <\?.*?\?> # processing instruction ) """, re.X) def _escape_special_chars(self, text): # Python markdown note: the HTML tokenization here differs from # that in Markdown.pl, hence the behaviour for subtle cases can # differ (I believe the tokenizer here does a better job because # it isn't susceptible to unmatched '<' and '>' in HTML tags). # Note, however, that '>' is not allowed in an auto-link URL # here. escaped = [] is_html_markup = False for token in self._sorta_html_tokenize_re.split(text): if is_html_markup: # Within tags/HTML-comments/auto-links, encode * and _ # so they don't conflict with their use in Markdown for # italics and strong. We're replacing each such # character with its corresponding MD5 checksum value; # this is likely overkill, but it should prevent us from # colliding with the escape values by accident. escaped.append(token.replace('*', g_escape_table['*']) .replace('_', g_escape_table['_'])) else: escaped.append(self._encode_backslash_escapes(token)) is_html_markup = not is_html_markup return ''.join(escaped) def _hash_html_spans(self, text): # Used for safe_mode. def _is_auto_link(s): if ':' in s and self._auto_link_re.match(s): return True elif '@' in s and self._auto_email_link_re.match(s): return True return False tokens = [] is_html_markup = False for token in self._sorta_html_tokenize_re.split(text): if is_html_markup and not _is_auto_link(token): sanitized = self._sanitize_html(token) key = _hash_text(sanitized) self.html_spans[key] = sanitized tokens.append(key) else: tokens.append(token) is_html_markup = not is_html_markup return ''.join(tokens) def _unhash_html_spans(self, text): for key, sanitized in self.html_spans.items(): text = text.replace(key, sanitized) return text def _sanitize_html(self, s): if self.safe_mode == "replace": return self.html_removed_text elif self.safe_mode == "escape": replacements = [ ('&', '&'), ('<', '<'), ('>', '>'), ] for before, after in replacements: s = s.replace(before, after) return s else: raise MarkdownError("invalid value for 'safe_mode': %r (must be " "'escape' or 'replace')" % self.safe_mode) _tail_of_inline_link_re = re.compile(r''' # Match tail of: [text](/url/) or [text](/url/ "title") \( # literal paren [ \t]* (?P # \1 <.*?> | .*? ) [ \t]* ( # \2 (['"]) # quote char = \3 (?P.*?) \3 # matching quote )? # title is optional \) ''', re.X | re.S) _tail_of_reference_link_re = re.compile(r''' # Match tail of: [text][id] [ ]? # one optional space (?:\n[ ]*)? # one optional newline followed by spaces \[ (?P<id>.*?) \] ''', re.X | re.S) def _do_links(self, text): """Turn Markdown link shortcuts into XHTML <a> and <img> tags. This is a combination of Markdown.pl's _DoAnchors() and _DoImages(). They are done together because that simplified the approach. It was necessary to use a different approach than Markdown.pl because of the lack of atomic matching support in Python's regex engine used in $g_nested_brackets. """ MAX_LINK_TEXT_SENTINEL = 300 # `anchor_allowed_pos` is used to support img links inside # anchors, but not anchors inside anchors. An anchor's start # pos must be `>= anchor_allowed_pos`. anchor_allowed_pos = 0 curr_pos = 0 while True: # Handle the next link. # The next '[' is the start of: # - an inline anchor: [text](url "title") # - a reference anchor: [text][id] # - an inline img: ![text](url "title") # - a reference img: ![text][id] # - a footnote ref: [^id] # (Only if 'footnotes' extra enabled) # - a footnote defn: [^id]: ... # (Only if 'footnotes' extra enabled) These have already # been stripped in _strip_footnote_definitions() so no # need to watch for them. # - a link definition: [id]: url "title" # These have already been stripped in # _strip_link_definitions() so no need to watch for them. # - not markup: [...anything else... try: start_idx = text.index('[', curr_pos) except ValueError: break text_length = len(text) # Find the matching closing ']'. # Markdown.pl allows *matching* brackets in link text so we # will here too. Markdown.pl *doesn't* currently allow # matching brackets in img alt text -- we'll differ in that # regard. bracket_depth = 0 for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, text_length)): ch = text[p] if ch == ']': bracket_depth -= 1 if bracket_depth < 0: break elif ch == '[': bracket_depth += 1 else: # Closing bracket not found within sentinel length. # This isn't markup. curr_pos = start_idx + 1 continue link_text = text[start_idx+1:p] # Possibly a footnote ref? if "footnotes" in self.extras and link_text.startswith("^"): normed_id = re.sub(r'\W', '-', link_text[1:]) if normed_id in self.footnotes: self.footnote_ids.append(normed_id) result = '<sup class="footnote-ref" id="fnref-%s">' \ '<a href="#fn-%s">%s</a></sup>' \ % (normed_id, normed_id, len(self.footnote_ids)) text = text[:start_idx] + result + text[p+1:] else: # This id isn't defined, leave the markup alone. curr_pos = p+1 continue # Now determine what this is by the remainder. p += 1 if p == text_length: return text # Inline anchor or img? if text[p] == '(': # attempt at perf improvement match = self._tail_of_inline_link_re.match(text, p) if match: # Handle an inline anchor or img. is_img = start_idx > 0 and text[start_idx-1] == "!" if is_img: start_idx -= 1 url, title = match.group("url"), match.group("title") if url and url[0] == '<': url = url[1:-1] # '<url>' -> 'url' # We've got to encode these to avoid conflicting # with italics/bold. url = url.replace('*', g_escape_table['*']) \ .replace('_', g_escape_table['_']) if title: title_str = ' title="%s"' \ % title.replace('*', g_escape_table['*']) \ .replace('_', g_escape_table['_']) \ .replace('"', '"') else: title_str = '' if is_img: result = '<img src="%s" alt="%s"%s%s' \ % (url, link_text.replace('"', '"'), title_str, self.empty_element_suffix) curr_pos = start_idx + len(result) text = text[:start_idx] + result + text[match.end():] elif start_idx >= anchor_allowed_pos: result_head = '<a href="%s"%s>' % (url, title_str) result = '%s%s</a>' % (result_head, link_text) # <img> allowed from curr_pos on, <a> from # anchor_allowed_pos on. curr_pos = start_idx + len(result_head) anchor_allowed_pos = start_idx + len(result) text = text[:start_idx] + result + text[match.end():] else: # Anchor not allowed here. curr_pos = start_idx + 1 continue # Reference anchor or img? else: match = self._tail_of_reference_link_re.match(text, p) if match: # Handle a reference-style anchor or img. is_img = start_idx > 0 and text[start_idx-1] == "!" if is_img: start_idx -= 1 link_id = match.group("id").lower() if not link_id: link_id = link_text.lower() # for links like [this][] if link_id in self.urls: url = self.urls[link_id] # We've got to encode these to avoid conflicting # with italics/bold. url = url.replace('*', g_escape_table['*']) \ .replace('_', g_escape_table['_']) title = self.titles.get(link_id) if title: title = title.replace('*', g_escape_table['*']) \ .replace('_', g_escape_table['_']) title_str = ' title="%s"' % title else: title_str = '' if is_img: result = '<img src="%s" alt="%s"%s%s' \ % (url, link_text.replace('"', '"'), title_str, self.empty_element_suffix) curr_pos = start_idx + len(result) text = text[:start_idx] + result + text[match.end():] elif start_idx >= anchor_allowed_pos: result = '<a href="%s"%s>%s</a>' \ % (url, title_str, link_text) result_head = '<a href="%s"%s>' % (url, title_str) result = '%s%s</a>' % (result_head, link_text) # <img> allowed from curr_pos on, <a> from # anchor_allowed_pos on. curr_pos = start_idx + len(result_head) anchor_allowed_pos = start_idx + len(result) text = text[:start_idx] + result + text[match.end():] else: # Anchor not allowed here. curr_pos = start_idx + 1 else: # This id isn't defined, leave the markup alone. curr_pos = match.end() continue # Otherwise, it isn't markup. curr_pos = start_idx + 1 return text _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) def _setext_h_sub(self, match): n = {"=": 1, "-": 2}[match.group(2)[0]] demote_headers = self.extras.get("demote-headers") if demote_headers: n = min(n + demote_headers, 6) return "<h%d>%s</h%d>\n\n" \ % (n, self._run_span_gamut(match.group(1)), n) _atx_h_re = re.compile(r''' ^(\#{1,6}) # \1 = string of #'s [ \t]* (.+?) # \2 = Header text [ \t]* (?<!\\) # ensure not an escaped trailing '#' \#* # optional closing #'s (not counted) \n+ ''', re.X | re.M) def _atx_h_sub(self, match): n = len(match.group(1)) demote_headers = self.extras.get("demote-headers") if demote_headers: n = min(n + demote_headers, 6) return "<h%d>%s</h%d>\n\n" \ % (n, self._run_span_gamut(match.group(2)), n) def _do_headers(self, text): # Setext-style headers: # Header 1 # ======== # # Header 2 # -------- text = self._setext_h_re.sub(self._setext_h_sub, text) # atx-style headers: # # Header 1 # ## Header 2 # ## Header 2 with closing hashes ## # ... # ###### Header 6 text = self._atx_h_re.sub(self._atx_h_sub, text) return text _marker_ul_chars = '*+-' _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars _marker_ul = '(?:[%s])' % _marker_ul_chars _marker_ol = r'(?:\d+\.)' def _list_sub(self, match): lst = match.group(1) lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" result = self._process_list_items(lst) if self.list_level: return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) else: return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) def _do_lists(self, text): # Form HTML ordered (numbered) and unordered (bulleted) lists. for marker_pat in (self._marker_ul, self._marker_ol): # Re-usable pattern to match any entire ul or ol list: less_than_tab = self.tab_width - 1 whole_list = r''' ( # \1 = whole list ( # \2 [ ]{0,%d} (%s) # \3 = first list item marker [ \t]+ ) (?:.+?) ( # \4 \Z | \n{2,} (?=\S) (?! # Negative lookahead for another list item marker [ \t]* %s[ \t]+ ) ) ) ''' % (less_than_tab, marker_pat, marker_pat) # We use a different prefix before nested lists than top-level lists. # See extended comment in _process_list_items(). # # Note: There's a bit of duplication here. My original implementation # created a scalar regex pattern as the conditional result of the test on # $g_list_level, and then only ran the $text =~ s{...}{...}egmx # substitution once, using the scalar as the pattern. This worked, # everywhere except when running under MT on my hosting account at Pair # Networks. There, this caused all rebuilds to be killed by the reaper (or # perhaps they crashed, but that seems incredibly unlikely given that the # same script on the same server ran fine *except* under MT. I've spent # more time trying to figure out why this is happening than I'd like to # admit. My only guess, backed up by the fact that this workaround works, # is that Perl optimizes the substition when it can figure out that the # pattern will never change, and when this optimization isn't on, we run # afoul of the reaper. Thus, the slightly redundant code to that uses two # static s/// patterns rather than one conditional pattern. if self.list_level: sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) text = sub_list_re.sub(self._list_sub, text) else: list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, re.X | re.M | re.S) text = list_re.sub(self._list_sub, text) return text _list_item_re = re.compile(r''' (\n)? # leading line = \1 (^[ \t]*) # leading whitespace = \2 (%s) [ \t]+ # list marker = \3 ((?:.+?) # list item text = \4 (\n{1,2})) # eols = \5 (?= \n* (\Z | \2 (%s) [ \t]+)) ''' % (_marker_any, _marker_any), re.M | re.X | re.S) _last_li_endswith_two_eols = False def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) leading_space = match.group(2) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: item = self._run_block_gamut(self._outdent(item)) else: # Recursion for sub-lists: item = self._do_lists(self._outdent(item)) if item.endswith('\n'): item = item[:-1] item = self._run_span_gamut(item) self._last_li_endswith_two_eols = (len(match.group(5)) == 2) return "<li>%s</li>\n" % item def _process_list_items(self, list_str): # Process the contents of a single ordered or unordered list, # splitting it into individual list items. # The $g_list_level global keeps track of when we're inside a list. # Each time we enter a list, we increment it; when we leave a list, # we decrement. If it's zero, we're not in a list anymore. # # We do this because when we're not inside a list, we want to treat # something like this: # # I recommend upgrading to version # 8. Oops, now this line is treated # as a sub-list. # # As a single paragraph, despite the fact that the second line starts # with a digit-period-space sequence. # # Whereas when we're inside a list (or sub-list), that line will be # treated as the start of a sub-list. What a kludge, huh? This is # an aspect of Markdown's syntax that's hard to parse perfectly # without resorting to mind-reading. Perhaps the solution is to # change the syntax rules such that sub-lists must start with a # starting cardinal number; e.g. "1." or "a.". self.list_level += 1 self._last_li_endswith_two_eols = False list_str = list_str.rstrip('\n') + '\n' list_str = self._list_item_re.sub(self._list_item_sub, list_str) self.list_level -= 1 return list_str def _get_pygments_lexer(self, lexer_name): try: from pygments import lexers, util except ImportError: return None try: return lexers.get_lexer_by_name(lexer_name) except util.ClassNotFound: return None def _color_with_pygments(self, codeblock, lexer): import pygments import pygments.formatters class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): def _wrap_code(self, inner): """A function for use in a Pygments Formatter which wraps in <code> tags. """ yield 0, "<code>" for tup in inner: yield tup yield 0, "</code>" def wrap(self, source, outfile): """Return the source with a code, pre, and div.""" return self._wrap_div(self._wrap_pre(self._wrap_code(source))) formatter = HtmlCodeFormatter(cssclass="codehilite") return pygments.highlight(codeblock, lexer, formatter) def _code_block_sub(self, match): codeblock = match.group(1) codeblock = self._outdent(codeblock) codeblock = self._detab(codeblock) codeblock = codeblock.lstrip('\n') # trim leading newlines codeblock = codeblock.rstrip() # trim trailing whitespace if "code-color" in self.extras and codeblock.startswith(":::"): lexer_name, rest = codeblock.split('\n', 1) lexer_name = lexer_name[3:].strip() lexer = self._get_pygments_lexer(lexer_name) codeblock = rest.lstrip("\n") # Remove lexer declaration line. if lexer: colored = self._color_with_pygments(codeblock, lexer) return "\n\n%s\n\n" % colored codeblock = self._encode_code(codeblock) return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock def _do_code_blocks(self, text): """Process Markdown `<pre><code>` blocks.""" code_block_re = re.compile(r''' (?:\n\n|\A) ( # $1 = the code block -- one or more lines, starting with a space/tab (?: (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces .*\n+ )+ ) ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc ''' % (self.tab_width, self.tab_width), re.M | re.X) return code_block_re.sub(self._code_block_sub, text) # Rules for a code span: # - backslash escapes are not interpreted in a code span # - to include one or or a run of more backticks the delimiters must # be a longer run of backticks # - cannot start or end a code span with a backtick; pad with a # space and that space will be removed in the emitted HTML # See `test/tm-cases/escapes.text` for a number of edge-case # examples. _code_span_re = re.compile(r''' (?<!\\) (`+) # \1 = Opening run of ` (?!`) # See Note A test/tm-cases/escapes.text (.+?) # \2 = The code block (?<!`) \1 # Matching closer (?!`) ''', re.X | re.S) def _code_span_sub(self, match): c = match.group(2).strip(" \t") c = self._encode_code(c) return "<code>%s</code>" % c def _do_code_spans(self, text): # * Backtick quotes are used for <code></code> spans. # # * You can use multiple backticks as the delimiters if you want to # include literal backticks in the code span. So, this input: # # Just type ``foo `bar` baz`` at the prompt. # # Will translate to: # # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> # # There's no arbitrary limit to the number of backticks you # can use as delimters. If you need three consecutive backticks # in your code, use four for delimiters, etc. # # * You can use spaces to get literal backticks at the edges: # # ... type `` `bar` `` ... # # Turns to: # # ... type <code>`bar`</code> ... return self._code_span_re.sub(self._code_span_sub, text) def _encode_code(self, text): """Encode/escape certain characters inside Markdown code runs. The point is that in code, these characters are literals, and lose their special Markdown meanings. """ replacements = [ # Encode all ampersands; HTML entities are not # entities within a Markdown code span. ('&', '&'), # Do the angle bracket song and dance: ('<', '<'), ('>', '>'), # Now, escape characters that are magic in Markdown: ('*', g_escape_table['*']), ('_', g_escape_table['_']), ('{', g_escape_table['{']), ('}', g_escape_table['}']), ('[', g_escape_table['[']), (']', g_escape_table[']']), ('\\', g_escape_table['\\']), ] for before, after in replacements: text = text.replace(before, after) return text _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) def _do_italics_and_bold(self, text): # <strong> must go first: if "code-friendly" in self.extras: text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) else: text = self._strong_re.sub(r"<strong>\2</strong>", text) text = self._em_re.sub(r"<em>\2</em>", text) return text _block_quote_re = re.compile(r''' ( # Wrap whole match in \1 ( ^[ \t]*>[ \t]? # '>' at the start of a line .+\n # rest of the first line (.+\n)* # subsequent consecutive lines \n* # blanks )+ ) ''', re.M | re.X) _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) def _dedent_two_spaces_sub(self, match): return re.sub(r'(?m)^ ', '', match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines bq = self._run_block_gamut(bq) # recurse bq = re.sub('(?m)^', ' ', bq) # These leading spaces screw with <pre> content, so we need to fix that: bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) return "<blockquote>\n%s\n</blockquote>\n\n" % bq def _do_block_quotes(self, text): if '>' not in text: return text return self._block_quote_re.sub(self._block_quote_sub, text) def _form_paragraphs(self, text): # Strip leading and trailing lines: text = text.strip('\n') # Wrap <p> tags. grafs = re.split(r"\n{2,}", text) for i, graf in enumerate(grafs): if graf in self.html_blocks: # Unhashify HTML blocks grafs[i] = self.html_blocks[graf] else: # Wrap <p> tags. graf = self._run_span_gamut(graf) grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" return "\n\n".join(grafs) def _add_footnotes(self, text): if self.footnotes: footer = [ '<div class="footnotes">', '<hr' + self.empty_element_suffix, '<ol>', ] for i, id in enumerate(self.footnote_ids): if i != 0: footer.append('') footer.append('<li id="fn-%s">' % id) footer.append(self._run_block_gamut(self.footnotes[id])) backlink = ('<a href="#fnref-%s" ' 'class="footnoteBackLink" ' 'title="Jump back to footnote %d in the text.">' '↩</a>' % (id, i+1)) if footer[-1].endswith("</p>"): footer[-1] = footer[-1][:-len("</p>")] \ + ' ' + backlink + "</p>" else: footer.append("\n<p>%s</p>" % backlink) footer.append('</li>') footer.append('</ol>') footer.append('</div>') return text + '\n\n' + '\n'.join(footer) else: return text # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: # http://bumppo.net/projects/amputator/ _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) def _encode_amps_and_angles(self, text): # Smart processing for ampersands and angle brackets that need # to be encoded. text = self._ampersand_re.sub('&', text) # Encode naked <'s text = self._naked_lt_re.sub('<', text) return text def _encode_backslash_escapes(self, text): for ch, escape in g_escape_table.items(): text = text.replace("\\"+ch, escape) return text _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) def _auto_link_sub(self, match): g1 = match.group(1) return '<a href="%s">%s</a>' % (g1, g1) _auto_email_link_re = re.compile(r""" < (?:mailto:)? ( [-.\w]+ \@ [-\w]+(\.[-\w]+)*\.[a-zA-Z]+ ) > """, re.I | re.X | re.U) def _auto_email_link_sub(self, match): return self._encode_email_address( self._unescape_special_chars(match.group(1))) def _do_auto_links(self, text): text = self._auto_link_re.sub(self._auto_link_sub, text) text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) return text def _encode_email_address(self, addr): # Input: an email address, e.g. "foo@example.com" # # Output: the email address as a mailto link, with each character # of the address encoded as either a decimal or hex entity, in # the hopes of foiling most address harvesting spam bots. E.g.: # # <a href="mailto:foo@e # xample.com">foo # @example.com</a> # # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk # mailing list: <http://tinyurl.com/yu7ue> chars = [_xml_encode_email_char_at_random(ch) for ch in "mailto:" + addr] # Strip the mailto: from the visible part. addr = '<a href="%s">%s</a>' \ % (''.join(chars), ''.join(chars[7:])) return addr def _do_link_patterns(self, text): """Caveat emptor: there isn't much guarding against link patterns being formed inside other standard Markdown links, e.g. inside a [link def][like this]. Dev Notes: *Could* consider prefixing regexes with a negative lookbehind assertion to attempt to guard against this. """ link_from_hash = {} for regex, href in self.link_patterns: replacements = [] for match in regex.finditer(text): replacements.append((match.span(), match.expand(href))) for (start, end), href in reversed(replacements): escaped_href = ( href.replace('"', '"') # b/c of attr quote # To avoid markdown <em> and <strong>: .replace('*', g_escape_table['*']) .replace('_', g_escape_table['_'])) link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) hash = md5(link).hexdigest() link_from_hash[hash] = link text = text[:start] + hash + text[end:] for hash, link in link_from_hash.items(): text = text.replace(hash, link) return text def _unescape_special_chars(self, text): # Swap back in all the special characters we've hidden. for ch, hash in g_escape_table.items(): text = text.replace(hash, ch) return text def _outdent(self, text): # Remove one level of line-leading tabs or spaces return self._outdent_re.sub('', text) class MarkdownWithExtras(Markdown): """A markdowner class that enables most extras: - footnotes - code-color (only has effect if 'pygments' Python module on path) These are not included: - pyshell (specific to Python-related documenting) - code-friendly (because it *disables* part of the syntax) - link-patterns (because you need to specify some actual link-patterns anyway) """ extras = ["footnotes", "code-color"] #---- internal support functions # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 def _curry(*args, **kwargs): function, args = args[0], args[1:] def result(*rest, **kwrest): combined = kwargs.copy() combined.update(kwrest) return function(*args + rest, **combined) return result # Recipe: regex_from_encoded_pattern (1.0) def _regex_from_encoded_pattern(s): """'foo' -> re.compile(re.escape('foo')) '/foo/' -> re.compile('foo') '/foo/i' -> re.compile('foo', re.I) """ if s.startswith('/') and s.rfind('/') != 0: # Parse it: /PATTERN/FLAGS idx = s.rfind('/') pattern, flags_str = s[1:idx], s[idx+1:] flag_from_char = { "i": re.IGNORECASE, "l": re.LOCALE, "s": re.DOTALL, "m": re.MULTILINE, "u": re.UNICODE, } flags = 0 for char in flags_str: try: flags |= flag_from_char[char] except KeyError: raise ValueError("unsupported regex flag: '%s' in '%s' " "(must be one of '%s')" % (char, s, ''.join(flag_from_char.keys()))) return re.compile(s[1:idx], flags) else: # not an encoded regex return re.compile(re.escape(s)) # Recipe: dedent (0.1.2) def _dedentlines(lines, tabsize=8, skip_first_line=False): """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines "lines" is a list of lines to dedent. "tabsize" is the tab width to use for indent width calculations. "skip_first_line" is a boolean indicating if the first line should be skipped for calculating the indent width and for dedenting. This is sometimes useful for docstrings and similar. Same as dedent() except operates on a sequence of lines. Note: the lines list is modified **in-place**. """ DEBUG = False if DEBUG: print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ % (tabsize, skip_first_line) indents = [] margin = None for i, line in enumerate(lines): if i == 0 and skip_first_line: continue indent = 0 for ch in line: if ch == ' ': indent += 1 elif ch == '\t': indent += tabsize - (indent % tabsize) elif ch in '\r\n': continue # skip all-whitespace lines else: break else: continue # skip all-whitespace lines if DEBUG: print "dedent: indent=%d: %r" % (indent, line) if margin is None: margin = indent else: margin = min(margin, indent) if DEBUG: print "dedent: margin=%r" % margin if margin is not None and margin > 0: for i, line in enumerate(lines): if i == 0 and skip_first_line: continue removed = 0 for j, ch in enumerate(line): if ch == ' ': removed += 1 elif ch == '\t': removed += tabsize - (removed % tabsize) elif ch in '\r\n': if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line lines[i] = lines[i][j:] break else: raise ValueError("unexpected non-whitespace char %r in " "line %r while removing %d-space margin" % (ch, line, margin)) if DEBUG: print "dedent: %r: %r -> removed %d/%d"\ % (line, ch, removed, margin) if removed == margin: lines[i] = lines[i][j+1:] break elif removed > margin: lines[i] = ' '*(removed-margin) + lines[i][j+1:] break else: if removed: lines[i] = lines[i][removed:] return lines def _dedent(text, tabsize=8, skip_first_line=False): """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text "text" is the text to dedent. "tabsize" is the tab width to use for indent width calculations. "skip_first_line" is a boolean indicating if the first line should be skipped for calculating the indent width and for dedenting. This is sometimes useful for docstrings and similar. textwrap.dedent(s), but don't expand tabs to spaces """ lines = text.splitlines(1) _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) return ''.join(lines) class _memoized(object): """Decorator that caches a function's return value each time it is called. If called later with the same arguments, the cached value is returned, and not re-evaluated. http://wiki.python.org/moin/PythonDecoratorLibrary """ def __init__(self, func): self.func = func self.cache = {} def __call__(self, *args): try: return self.cache[args] except KeyError: self.cache[args] = value = self.func(*args) return value except TypeError: # uncachable -- for instance, passing a list as an argument. # Better to not cache than to blow up entirely. return self.func(*args) def __repr__(self): """Return the function's docstring.""" return self.func.__doc__ def _hr_tag_re_from_tab_width(tab_width): return re.compile(r""" (?: (?<=\n\n) # Starting after a blank line | # or \A\n? # the beginning of the doc ) ( # save in \1 [ ]{0,%d} <(hr) # start tag = \2 \b # word break ([^<>])*? # /?> # the matching end tag [ \t]* (?=\n{2,}|\Z) # followed by a blank line or end of document ) """ % (tab_width - 1), re.X) _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) def _xml_encode_email_char_at_random(ch): r = random() # Roughly 10% raw, 45% hex, 45% dec. # '@' *must* be encoded. I [John Gruber] insist. if r > 0.9 and ch != "@": return ch elif r < 0.45: # The [1:] is to drop leading '0': 0x63 -> x63 return '&#%s;' % hex(ord(ch))[1:] else: return '&#%s;' % ord(ch) def _hash_text(text): return 'md5:'+md5(text.encode("utf-8")).hexdigest() #---- mainline class _NoReflowFormatter(optparse.IndentedHelpFormatter): """An optparse formatter that does NOT reflow the description.""" def format_description(self, description): return description or "" def _test(): import doctest doctest.testmod() def main(argv=sys.argv): if not logging.root.handlers: logging.basicConfig() usage = "usage: %prog [PATHS...]" version = "%prog "+__version__ parser = optparse.OptionParser(prog="markdown2", usage=usage, version=version, description=cmdln_desc, formatter=_NoReflowFormatter()) parser.add_option("-v", "--verbose", dest="log_level", action="store_const", const=logging.DEBUG, help="more verbose output") parser.add_option("--encoding", help="specify encoding of text content") parser.add_option("--html4tags", action="store_true", default=False, help="use HTML 4 style for empty element tags") parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", help="sanitize literal HTML: 'escape' escapes " "HTML meta chars, 'replace' replaces with an " "[HTML_REMOVED] note") parser.add_option("-x", "--extras", action="append", help="Turn on specific extra features (not part of " "the core Markdown spec). Supported values: " "'code-friendly' disables _/__ for emphasis; " "'code-color' adds code-block syntax coloring; " "'link-patterns' adds auto-linking based on patterns; " "'footnotes' adds the footnotes syntax;" "'pyshell' to put unindented Python interactive shell sessions in a <code> block.") parser.add_option("--use-file-vars", help="Look for and use Emacs-style 'markdown-extras' " "file var to turn on extras. See " "<http://code.google.com/p/python-markdown2/wiki/Extras>.") parser.add_option("--link-patterns-file", help="path to a link pattern file") parser.add_option("--self-test", action="store_true", help="run internal self-tests (some doctests)") parser.add_option("--compare", action="store_true", help="run against Markdown.pl as well (for testing)") parser.set_defaults(log_level=logging.INFO, compare=False, encoding="utf-8", safe_mode=None, use_file_vars=False) opts, paths = parser.parse_args() log.setLevel(opts.log_level) if opts.self_test: return _test() if opts.extras: extras = {} for s in opts.extras: splitter = re.compile("[,;: ]+") for e in splitter.split(s): if '=' in e: ename, earg = e.split('=', 1) try: earg = int(earg) except ValueError: pass else: ename, earg = e, None extras[ename] = earg else: extras = None if opts.link_patterns_file: link_patterns = [] f = open(opts.link_patterns_file) try: for i, line in enumerate(f.readlines()): if not line.strip(): continue if line.lstrip().startswith("#"): continue try: pat, href = line.rstrip().rsplit(None, 1) except ValueError: raise MarkdownError("%s:%d: invalid link pattern line: %r" % (opts.link_patterns_file, i+1, line)) link_patterns.append( (_regex_from_encoded_pattern(pat), href)) finally: f.close() else: link_patterns = None from os.path import join, dirname, abspath markdown_pl = join(dirname(dirname(abspath(__file__))), "test", "Markdown.pl") for path in paths: if opts.compare: print "==== Markdown.pl ====" perl_cmd = 'perl %s "%s"' % (markdown_pl, path) o = os.popen(perl_cmd) perl_html = o.read() o.close() sys.stdout.write(perl_html) print "==== markdown2.py ====" html = markdown_path(path, encoding=opts.encoding, html4tags=opts.html4tags, safe_mode=opts.safe_mode, extras=extras, link_patterns=link_patterns, use_file_vars=opts.use_file_vars) sys.stdout.write( html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) if opts.compare: print "==== match? %r ====" % (perl_html == html) if __name__ == "__main__": sys.exit( main(sys.argv) ) ������������������������������������������������������������������������������������������������������������weblog-1.1/weblog/post.py���������������������������������������������������������������������������0000644�0000000�0000000�00000021054�11063022120�015577� 0����������������������������������������������������������������������������������������������������ustar�00user����������������������������group���������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re import email import codecs import logging from os import stat import datetime from urllib import quote try: from markdown2 import markdown except ImportError: def markdown(text, *args): logging.warning('Markdown syntax not available.' 'Please install markdown2.') # Ugly but better than nothing :) return '<pre>%s</pre>' % text from html_to_xhtml import html_to_xhtml class PostError(Exception): ''' Error in post file ''' def __init__(self, filename, message): Exception.__init__(self, '%s: %s' % (filename, message)) class Author(unicode): _AUTHOR_REGEX = re.compile(u'(?P<name>.+[^\s])\s*<(?P<email>[^@]+@.+)>', re.UNICODE) def name(self): r = self._AUTHOR_REGEX.match(self) if r: return r.group('name') else: return self def email(self): r = self._AUTHOR_REGEX.match(self) if r: return r.group('email') else: return '' class Post(object): DEFAULT_ENCODING = u'ascii' DEFAULT_AUTHOR = u'unknown author' def __init__(self, f, markup=None): if isinstance(f, basestring): self._filename = f input_file = open(f) else: self._filename = None input_file = f post_file = email.message_from_file(input_file) # First get the file's encoding self.encoding = unicode(post_file.get('encoding') or self.DEFAULT_ENCODING) try: codecs.lookup(self.encoding) except LookupError, e: raise PostError(self.get_filename(), str(e)) # Copy all field "into the object" and convert string to unicode. try: for key, value in post_file.items(): self.__dict__[key.encode('ascii').lower()] = \ unicode(value, self.encoding) except UnicodeDecodeError, e: raise PostError(self.get_filename(), "for key '%s': %s" % (key, e)) if not hasattr(self, 'author'): self.author = Author(self.DEFAULT_AUTHOR) else: self.author = Author(self.author) # Handle the date. If no date was specified use the file's modification # time. if not hasattr(self, 'date'): if not self._filename: self.date = None else: # Get the date from file's mtime and issue a warning mtime = stat(self._filename).st_mtime self.date = datetime.datetime.fromtimestamp(mtime) logging.warning("No date defined in '%s', using the file's " "last modification time instead." % \ self._filename) else: try: self.date = self.parse_date(self.date) except ValueError, e: raise PostError(self.get_filename(), str(e)) try: self.ascii_title = self.title.encode('ascii', 'replace') except UnicodeDecodeError, e: raise PostError(self.get_filename(), 'Bad encoding in title') if not post_file.get_payload(): raise PostError(self.get_filename(), 'does not have content') try: self.content = unicode(post_file.get_payload(), self.encoding) except UnicodeDecodeError, e: # find error line number for line_number, line in enumerate(post_file.as_string().\ splitlines()): try: line.decode('ascii' if self.encoding == 'raw' else self.encoding) except UnicodeDecodeError, e: break # line_number starts at 0, real line number == line_number + 1 raise PostError(self.get_filename(), 'Bad encoding in content line %d, %s' % \ (line_number + 1, e)) if not markup: # Determine type via file extension if self._filename and self._filename.endswith('.txt'): self._markup = 'markdown' elif self._filename and self._filename.endswith('.html'): self._markup = 'html' elif not self._filename: self._markup = 'html' else: logging.warning("Unable to determine '%s' type, falling " "back to HTML" % self._filename) self._markup = 'html' else: assert markup in ('markdown', 'html') self._markup = markup # Transform the 'files' field into a list of string if hasattr(self, 'files'): self.files = self.files.split() else: self.files = list() # FIXME prefix & suffix param or members of the class ? def url(self, prefix=''): ''' >>> file_content = """title: test ... date: 2008-1-1 ... ... test""" >>> from StringIO import StringIO >>> Post(StringIO(file_content)).url() '2008/1/1/test.html' >>> Post(StringIO(file_content)).url('prefix/') 'prefix/2008/1/1/test.html' >>> file_content = """title: Weird @!% filename ... date: 2008-1-1 ... ... test""" >>> Post(StringIO(file_content)).url() '2008/1/1/Weird%20%40%21%25%20filename.html' ''' return '%s%d/%d/%d/%s.html' % \ (prefix, self.date.year, self.date.month, self.date.day, quote(self.ascii_title)) _DATE_FORMAT_LIST = ('%Y-%m-%d', '%y-%m-%d') _DATETIME_FORMAT_LIST = \ tuple('%s %%H:%%M' % f for f in _DATE_FORMAT_LIST) + \ tuple('%s %%H:%%M:%%S' % f for f in _DATE_FORMAT_LIST) @staticmethod def parse_date(date_): """ >>> Post.parse_date('2006-1-1') datetime.date(2006, 1, 1) >>> Post.parse_date('2007-12-31') datetime.date(2007, 12, 31) >>> Post.parse_date('2008-4-05 12:35') datetime.datetime(2008, 4, 5, 12, 35) >>> Post.parse_date('10000-1-1') Traceback (most recent call last): ... ValueError: Unable to parse date '10000-1-1' (Use YYYY-MM-DD [[HH:MM]:SS] format) >>> Post.parse_date(2007) Traceback (most recent call last): ... TypeError: strptime() argument 1 must be string, not int """ for date_format in Post._DATE_FORMAT_LIST: try: return datetime.datetime.strptime(date_, date_format).date() except ValueError: continue for date_format in Post._DATETIME_FORMAT_LIST: try: return datetime.datetime.strptime(date_, date_format) except ValueError: continue raise ValueError('Unable to parse date \'%s\'\n' '(Use YYYY-MM-DD [[HH:MM]:SS] format)' % (date_)) def get_filename(self): if not self._filename: return '<unknown filename>' else: return self._filename def get_html(self): if self._markup == 'markdown': return markdown(self.content, html4tags=True) elif self._markup == 'html': return self.content else: assert False, "Unknown type for %r" % self def get_xhtml(self): if self._markup == 'markdown': return markdown(self.content, html4tags=False) elif self._markup == 'html': return html_to_xhtml(self.content) else: assert False, "Unknown type for %r" % self def __cmp__(self, other): ''' >>> file1 = "title: 1\\ndate: 2008-1-1\\n\\ntest" >>> file2 = "title: 2\\ndate: 2007-12-31\\n\\ntest" >>> from StringIO import StringIO >>> Post(StringIO(file1)) > Post(StringIO(file2)) True >>> Post(StringIO(file1)) == Post(StringIO(file2)) False >>> Post(StringIO(file1)) == Post(StringIO(file1)) True >>> l = [Post(StringIO(file2)), Post(StringIO(file1))] >>> l.index(Post(StringIO(file1))) 1 ''' return cmp(unicode(self.date) + self.title, unicode(other.date) + other.title) def __hash__(self): return hash(str(self.date) + self.title) def __repr__(self): return '<%s(%r, %r)>' % (self.__class__.__name__, self.title, self.date) if __name__ == '__main__': import doctest doctest.testmod() ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������weblog-1.1/weblog/publish.py������������������������������������������������������������������������0000644�0000000�0000000�00000012100�11063022120�016250� 0����������������������������������������������������������������������������������������������������ustar�00user����������������������������group���������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import os import datetime import logging import codecs from shutil import copy import weblog from _jinja_environment import jinja_environment from html_full_url import html_full_url from post import Post, PostError from load import load_post_list, load_configuration from listing import generate_index_listing def generate_post_html(post_list, output_dir, post_tmpl, params): for post in post_list: logging.debug('Generating HTML file for %r', post) dir = os.path.join(output_dir, str(post.date.year), str(post.date.month), str(post.date.day)) if not os.path.exists(dir): logging.debug('Creating \'%s\'', dir) os.makedirs(dir) elif not os.path.isdir(dir): raise IOError('\'%s\' already exists and is not a directory' % dir) top_dir = '../../../' r = post_tmpl.render(title=post.title, date=post.date, author=post.author, content=html_full_url(top_dir, post.get_html()), top_dir=top_dir, **dict(((k, v) for k, v in params.iteritems() if k != 'title' and k != 'content'))) open(os.path.join(dir, post.ascii_title + '.html'), 'w').write(r) def command_publish(args, options): source_dir = options.source_dir output_dir = options.output_dir try: config = load_configuration(options.configuration_file, source_dir or '.') except (KeyError, ValueError, IOError), error: logging.error('Error while loading configuration file \'%s\'' % options.configuration_file) raise SystemExit(error) source_dir = source_dir or config.get('source_dir', '.') output_dir = output_dir or config.get('output_dir', 'output') # add the default author & encoding constant to the post class if 'encoding' in config: Post.DEFAULT_ENCODING = config['encoding'] author = config.get('author', None) if author: Post.DEFAULT_AUTHOR = author if not os.path.exists(output_dir): os.mkdir(output_dir) env = jinja_environment(source_dir) try: post_list = list(reversed(sorted(load_post_list(source_dir)))) except (IOError, PostError), e: logging.error('Error while loading post files.') raise SystemExit(e) def generate_all(): params = dict(title=config['title'], description=config.get('description'), url=config['url'], html_head=config.get('html_head'), html_header=config.get('html_header'), html_footer=config.get('html_footer')) # generate the main index page logging.debug('Generating HTML listings') index_template = env.get_template('index.html.tmpl') generate_index_listing(config['post_per_page'], output_dir, index_template, post_list, params) logging.debug('Generating HTML posts files') post_tmpl = env.get_template('post.html.tmpl') generate_post_html(post_list, output_dir, post_tmpl, params) # Copy all 'attached' files for post in post_list: for filename in post.files: destination = os.path.join(output_dir, filename) # Create the destination directory if it does not exist destination_dir = os.path.dirname(destination) # isdir returns False if the passed file does not exist if not os.path.isdir(destination_dir): os.makedirs(destination_dir) copy(os.path.join(source_dir, filename), destination) # Generate Atom feed template = env.get_template('feed.atom.tmpl') # Last time the feed was updated posts = post_list[:config['feed_limit']] if posts: feed_updated = max(p.date for p in posts) else: feed_updated = datetime.datetime.utcnow() atom_file = codecs.open(os.path.join(output_dir, 'feed.atom'), 'w', encoding='utf8') atom_file.write(template.render(posts=posts, feed_updated=feed_updated, weblog_version=weblog.__version__, **params)) atom_file.close() for f in config.get('extra_files', '').split(): copy(os.path.join(source_dir, f), output_dir) if options.debug: generate_all() else: try: generate_all() except IOError, e: logging.error('Error while generating files ...') raise SystemExit(e) else: logging.info('Successfully generated weblog.') ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������weblog-1.1/weblog/rfc3339.py������������������������������������������������������������������������0000644�0000000�0000000�00000016007�11063022120�015710� 0����������������������������������������������������������������������������������������������������ustar�00user����������������������������group���������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python ''' The function `rfc3339` formats dates according to the :RFC:`3339`. `rfc3339` tries to have as much as possible sensible defaults. ''' __author__ = 'Henry Precheur <henry@precheur.org>' __license__ = 'Public Domain' __version__ = '1' __all__ = ('rfc3339', ) import datetime import time import unittest def _timezone(utcoffset): ''' Return a string representing the timezone offset. >>> _timezone(0) '+00:00' >>> _timezone(3600) '+01:00' >>> _timezone(-28800) '-08:00' ''' # Python's division uses floor(), not round() like in other languages. # >>> -1 / 2 # -1 hours = int(float(utcoffset)) // 3600 minutes = abs(utcoffset) % 3600 // 60 return '%+03d:%02d' % (hours, minutes) def _timedelta_to_seconds(timedelta): ''' >>> _timedelta_to_seconds(datetime.timedelta(hours=3)) 10800 >>> _timedelta_to_seconds(datetime.timedelta(hours=3, minutes=15)) 11700 ''' return (timedelta.days * 86400 + timedelta.seconds + timedelta.microseconds // 1000) def _utc_offset(date, use_system_timezone): ''' Return the UTC offset of `date`. If `date` does not have any `tzinfo`, use the timezone informations stored locally on the system. >>> if time.daylight: ... system_timezone = -time.altzone ... else: ... system_timezone = -time.timezone >>> _utc_offset(datetime.datetime.now(), True) == system_timezone True >>> _utc_offset(datetime.datetime.now(), False) 0 ''' if isinstance(date, datetime.datetime) and date.tzinfo is not None: return _timedelta_to_seconds(date.dst() or date.utcoffset()) elif use_system_timezone: if time.daylight: return -time.altzone else: return -time.timezone else: return 0 def _utc_string(d): return d.strftime('%Y-%m-%dT%H:%M:%SZ') def rfc3339(date, utc=False, use_system_timezone=True): ''' Return a string formatted according to the :RFC:`3339`. If called with `utc=True`, it normalizes `date` to the UTC date. If `date` does not have any timezone information, uses the local timezone:: >>> date = datetime.datetime(2008, 4, 2, 20) >>> rfc3339(date, utc=True, use_system_timezone=False) '2008-04-02T20:00:00Z' >>> rfc3339(date) # doctest: +ELLIPSIS '2008-04-02T20:00:00...' If called with `user_system_time=False` don't use the local timezone if `date` does not have timezone informations and consider the offset to UTC to be zero:: >>> rfc3339(date, use_system_timezone=False) '2008-04-02T20:00:00+00:00' `date` must be a `datetime.datetime`, `datetime.date` or a timestamp as returned by `time.time()`:: >>> rfc3339(0, utc=True, use_system_timezone=False) '1970-01-01T00:00:00Z' >>> rfc3339(datetime.date(2008, 9, 6), utc=True, ... use_system_timezone=False) '2008-09-06T00:00:00Z' >>> rfc3339(datetime.date(2008, 9, 6), ... use_system_timezone=False) '2008-09-06T00:00:00+00:00' >>> rfc3339('foo bar') Traceback (most recent call last): ... TypeError: excepted datetime, got str instead ''' # Check if `date` is a timestamp. try: if utc: return _utc_string(datetime.datetime.utcfromtimestamp(date)) else: date = datetime.datetime.fromtimestamp(date) except TypeError: pass if isinstance(date, datetime.date): utcoffset = _utc_offset(date, use_system_timezone) if utc: if not isinstance(date, datetime.datetime): date = datetime.datetime(*date.timetuple()[:3]) return _utc_string(date + datetime.timedelta(seconds=utcoffset)) else: return date.strftime('%Y-%m-%dT%H:%M:%S') + _timezone(utcoffset) else: raise TypeError('excepted %s, got %s instead' % (datetime.datetime.__name__, date.__class__.__name__)) class LocalTimeTestCase(unittest.TestCase): ''' Test the use of the timezone saved locally. Since it is hard to test using doctest. ''' def setUp(self): local_utcoffset = _utc_offset(datetime.datetime.now(), True) self.local_utcoffset = datetime.timedelta(seconds=local_utcoffset) self.local_timezone = _timezone(local_utcoffset) def test_datetime(self): d = datetime.datetime.now() self.assertEqual(rfc3339(d), d.strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone) def test_datetime_timezone(self): class FixedNoDst(datetime.tzinfo): 'A timezone info with fixed offset, not DST' def utcoffset(self, dt): return datetime.timedelta(hours=2, minutes=30) def dst(self, dt): return None fixed_no_dst = FixedNoDst() class Fixed(FixedNoDst): 'A timezone info with DST' def dst(self, dt): return datetime.timedelta(hours=3, minutes=15) fixed = Fixed() d = datetime.datetime.now().replace(tzinfo=fixed_no_dst) timezone = _timezone(_timedelta_to_seconds(fixed_no_dst.\ utcoffset(None))) self.assertEqual(rfc3339(d), d.strftime('%Y-%m-%dT%H:%M:%S') + timezone) d = datetime.datetime.now().replace(tzinfo=fixed) timezone = _timezone(_timedelta_to_seconds(fixed.dst(None))) self.assertEqual(rfc3339(d), d.strftime('%Y-%m-%dT%H:%M:%S') + timezone) def test_datetime_utc(self): d = datetime.datetime.now() d_utc = d + self.local_utcoffset self.assertEqual(rfc3339(d, utc=True), d_utc.strftime('%Y-%m-%dT%H:%M:%SZ')) def test_date(self): d = datetime.date.today() self.assertEqual(rfc3339(d), d.strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone) def test_date_utc(self): d = datetime.date.today() # Convert `date` to `datetime`, since `date` ignores seconds and hours # in timedeltas: # >>> datetime.date(2008, 9, 7) + datetime.timedelta(hours=23) # datetime.date(2008, 9, 7) d_utc = datetime.datetime(*d.timetuple()[:3]) + self.local_utcoffset self.assertEqual(rfc3339(d, utc=True), d_utc.strftime('%Y-%m-%dT%H:%M:%SZ')) def test_timestamp(self): d = time.time() self.assertEqual(rfc3339(d), datetime.datetime.fromtimestamp(d).\ strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone) def test_timestamp_utc(self): d = time.time() d_utc = datetime.datetime.utcfromtimestamp(d) + self.local_utcoffset self.assertEqual(rfc3339(d), (d_utc.strftime('%Y-%m-%dT%H:%M:%S') + self.local_timezone)) if __name__ == '__main__': import doctest doctest.testmod() unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������weblog-1.1/weblog/templates/base.html.tmpl����������������������������������������������������������0000644�0000000�0000000�00000001633�11063022120�021012� 0����������������������������������������������������������������������������������������������������ustar�00user����������������������������group���������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <head> <title>{% block title %}{{ title|decode|escape }}{% endblock %} {% block feed %} {% endblock %} {{ html_head|decode|renderstring }} {% block extrahead %} {% endblock %} {% block header %} {{ html_header|decode|renderstring }} {% endblock %}
{% block content %}{% endblock %}
{% block footer %} {% if html_footer %} {{ html_footer|decode|renderstring }} {% else %}

Published using Weblog

{% endif %} {% endblock %} {# vim:set ft=htmljinja: #} weblog-1.1/weblog/templates/feed.atom.tmpl0000644000000000000000000000212011063022120020767 0ustar00usergroup00000000000000 {{ url|escape }} {{ title|escape }} {{ description|escape }} {{ feed_updated|rfc3339 }} {% if author %} {{ author.name()|escape }} {{ author.email() }} {{ url|escape }} {% endif %} Weblog {% for post in posts %} {{ post.url(url) }} {{ post.title|escape }} {{ post.date|rfc3339 }} {{ post.author.name()|escape }} {{ post.author.email()|escape }} {{ url|escape }}
{{ post.get_xhtml() }}
{% endfor %}
{# vim: set filetype=jinja ts=4 sw=4 et: #} weblog-1.1/weblog/templates/index.html.tmpl0000644000000000000000000000225311063022120021206 0ustar00usergroup00000000000000{% extends 'base.html.tmpl' %} {% block content %}

{{ title|decode|escape }}

{% if description %}

{{ description|decode|escape }}

{% endif %} {% for post in post_list %}

{{ post.title|decode|escape }}

{{ post.date|format_date }}, by {{ post.author.name()|decode }} <{{ post.author.email()|urlize }}>

{{ post.get_html()|decode }}
{% endfor %} {% if pages|length > 1 %}
{% if pages.index(page) > 0 %} « prev {% endif %} {% for p in pages %} {% if p == page %} {{ p.title }} {% else %} {{ p.title }} {% endif %} {% endfor %} {% if pages.index(page) + 1 < pages|length %} next » {% endif %}
{% endif %} {% endblock %} {# vim:set ft=htmljinja: #} weblog-1.1/weblog/templates/post.html.tmpl0000644000000000000000000000074711063022120021072 0ustar00usergroup00000000000000{% extends 'base.html.tmpl' %} {% block content %}

{{ title|decode|escape }}

{{ date|format_date }}, by {{ author.name()|decode }} <{{ author.email()|urlize }}>

{{ content|decode }}
{% endblock %} {# vim:set ft=htmljinja: #} weblog-1.1/weblog/utf8_html_parser.py0000644000000000000000000000437011063022120020102 0ustar00usergroup00000000000000from cgi import escape from HTMLParser import HTMLParser class UTF8HTMLParser(HTMLParser): ''' Parse a HTML document and convert all nodes to UTF-8:: >>> parser = UTF8HTMLParser() >>> parser.feed("

Hello world

") >>> parser.get_value() u"

Hello world

" >>> parser.feed('

Another sentence.

') >>> parser.get_value() u"

Hello world

Another sentence.

" `reset()` resets the parser:: >>> parser.reset() >>> parser.get_value() u'' ''' def __init__(self): HTMLParser.__init__(self) self.output = list() def reset(self): HTMLParser.reset(self) self.output = list() def get_value(self): return u''.join(self.output) @staticmethod def html_attrs(attrs): ''' >>> UTF8HTMLParser.html_attrs((('src', 'pic.jpg'), ('alt', 'pic'))) u"src='pic.jpg' alt='pic'" >>> UTF8HTMLParser.html_attrs(list()) u'' >>> UTF8HTMLParser.html_attrs((('href', 'sample?foo=1&bar=2'),)) u"href='sample?foo=1&bar=2'" ''' # HTMLParser unescape attributes values, we don't want that. return u' '.join(u'%s=\'%s\'' % (k, escape(v)) for k, v in attrs) def handle_starttag(self, tag, attrs): if attrs: self.output.append(u'<%s %s>' % (tag, self.html_attrs(attrs))) else: self.output.append(u'<%s>' % tag) def handle_startendtag(self, tag, attrs): self.handle_starttag(tag, attrs) def handle_endtag(self, tag): self.output.append(u'' % tag) def handle_data(self, data): self.output.append(data) def handle_charref(self, name): self.output.append(u'&#%s;' % name) def handle_entityref(self, name): self.output.append(u'&%s;' % name) def handle_comment(self, comment): self.output.append(u'' % comment) def handle_decl(self, decl): self.output.append(u'' % decl) def handle_pi(self, pi): self.output.append(u'' % pi) if __name__ == '__main__': import doctest doctest.testmod() weblog-1.1/weblog/utils.py0000644000000000000000000000307511063022120015755 0ustar00usergroup00000000000000import os import logging import datetime from cgi import escape def load_if_filename(source_dir, f): ''' If ``f`` is a filename. Read it and returns the content. Else return ``f``. If ``bool(f)`` is false returns ``None``. # Assumes that there is no file named 'This is not a file' in the current # directory ;-) >>> load_if_filename('.', 'This is not a file') 'This is not a file' >>> load_if_filename('.', '') >>> load_if_filename('.', list()) >>> load_if_filename('.', None) ''' if not f: return full = os.path.join(source_dir, f) if os.path.exists(full): return file(full).read() else: return f def format_date(date): ''' Return a string representing a ``date`` or a ``datetime``. >>> format_date(datetime.datetime(2008, 1, 1, 20, 40, 23, 345)) '2008-01-01 20:40:23' >>> format_date(datetime.datetime(2008, 1, 1)) '2008-01-01 00:00:00' >>> format_date(datetime.date(2008, 1, 1)) '2008-01-01' >>> format_date(datetime.time()) Traceback (most recent call last): ... TypeError: expected date or datetime, got time instead ''' if isinstance(date, datetime.datetime): return date.strftime('%Y-%m-%d %H:%M:%S') elif isinstance(date, datetime.date): return str(date) else: raise TypeError('expected %s or %s, got %s instead' % (datetime.date.__name__, datetime.datetime.__name__, date.__class__.__name__)) if __name__ == '__main__': import doctest doctest.testmod() weblog-1.1/weblog_run.py0000755000000000000000000000037511063022120015504 0ustar00usergroup00000000000000#!/usr/bin/env python import imp from os import path filename = path.join(path.dirname(__file__), 'bin', 'weblog') module = imp.load_module('weblog_executable', file(filename), filename, ('', 'r', imp.PY_SOURCE)) module.main()