Rcjp's Weblog

March 30, 2007

Old Blog Code

Filed under: python — rcjp @ 1:10 pm

just incase there is some useful code in it, this is what I used to use to generate my old blog before I gave up and moved to wordpress. I had a nested directory of text files that contained keywords that this python expanded into html tags. It even used to spawn off gimp to generate heading images.

import os, sys, re
global bloghome
bloghome = 'http://www.codephrenic.co.uk/blog/'   # where the blog is destined for (see below)
blogbase = '/home/r/blog'                         # where the files get generated to

# html tag translations
# e.g. link["www.physics.org", "the physics website"]
#      bigpic[{'src':'/mypic.jpg', 'title':'blah', 'width':100, 'height':100}]
tags = {
    'title'     : '<div class="header"><h2>%s</h2></div>',
    'entry'     : '<div class="entry">%s</div>',
    'date'      : '<div class="date">%s</div>',
    'list'      : '<ul>%s</ul>',
    'item'      : '<li>%s</li>',
    'link'      : '<a href="%s">%s</a>',
    'pic'       : '<img src="%s" alt="%s" width="50" height="50"/>',
    'picright'  : '<img class="floatright" src="%s" width="50" height="50"/>',
    'bigpic'    : '<img src="%(src)s" alt="%(title)s" width="%(width)d" height="%(height)d"/>',
    'italic'    : '<i>%s</i>',
    'tt'        : '<tt>%s</tt>',
    'bold'      : '<b>%s</b>'
    }

blogheader = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
    <head>
        <title>rcjp blog</title>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
        <link rel="stylesheet" type="text/css" href="blog.css" />
        <link rel="stylesheet" type="text/css" href="titles.css" />
    </head>
    <body><div id="blogpage">
"""

blogfooter = '</div></body></html>'

def make_heading_image(title, category):
    print 'processing image %s' % category
    # change any  ' -> '\''
    cmd = """gimp --batch-interpreter plug_in_script_fu_eval -i -b '(blogheading "%s" 100 "Trebuchet MS" "%s")' -b '(gimp-quit 0)'""" % (title.replace("'", "'\\''"), blogbase+"/images/"+category+".png")
#     f = os.popen("%s" % cmd, 'w')
    f = os.popen(cmd, 'w')
    f.flush()
    f.close()

def func_expand(match):
    fname, body = match.group(1,2)
    try:
        if body.startswith('"') or body.startswith('{'):
            return tags[fname] % (eval(body))
        else:
            return tags[fname] % (body)
    except(TypeError), val:
        sys.exit('***ERROR: Tag "%s" has been supplied wrong arguments "%s"' % (fname, body))
    except(KeyError), val:
        sys.exit('***ERROR: Unknown tag "%s"' % fname)

def expand_string(str):
    # look for any functions of the form  func[...]
    # embedded that we should expand into html tags
    func = re.compile(r'(\w+)\s*?\[([^\[]*?)\]')
    found = 1
    while found > 0:
        (str, found) = func.subn(func_expand, str)
    return str

def escape_string(text):
    # change > to &gt; etc.
    # and deal with code[], it needs separate treatment because
    # the regexp approach in expand_string will find [] inside
    # code segments and try to expand it 
    searchstr = 'code['
    pos = text.find(searchstr)
    newtext = ''
    while pos >= 0:
        bcount = 1             # count the number of [ we have to skip
        newtext = text[:pos]+'<code>'
        pos += len(searchstr)
        while pos < len(text):  # find matching ] bracket
            if text[pos] == '[':
                bcount += 1
                newtext += '['
            elif text[pos] == ']':
                bcount -= 1
                if bcount == 0:
                    newtext += '</code>' + text[pos+1:]
                    break
                newtext += ']'
            elif text[pos] == '<':
                newtext += '&lt;'
            elif text[pos] == '>':
                newtext += '&gt;'
            elif text[pos] == '&':
                newtext += '&&;'
            else:
                newtext += text[pos]
            pos += 1
        if pos == len(text):
            sys.exit('Could not find matching bracket')
        text = newtext
        pos = text.find(searchstr)
    return text

def paragraphs(lines, is_separator=str.isspace, joiner=''.join):
    paragraph = []
    for line in lines:
        if is_separator(line):
            if paragraph:
                yield joiner(paragraph)
                paragraph = []
        else:
            paragraph.append(line)
    if paragraph:
        yield joiner(paragraph)

def process_entry(filename):
    print 'processing ', filename
    blogfile = open(filename, 'r')
    dir, name = os.path.split(filename)

    iday = int(name[6:8])    # turn 08 into 8 etc
    day = str(iday)
    if 4 <= iday <= 20 or 24 <= iday <= 30: # get the day suffix
        day+='th'
    else:
        day+= ['st', 'nd', 'rd'][iday % 10 - 1]

    months = {'01':'January', '02':'February', '03':'March', '04':'April', '05':'May', '06':'June',
              '07':'July', '08':'August', '09':'September', '10':'October', '11':'November', '12':'December'}
    month = months[name[4:6]]

    date = '%s %s %s' % (day, month, name[0:4])
    entry = 'entry[date[%s]%s]' % (date, blogfile.read())
    blogfile.close()

    text = ''
    for para in paragraphs(expand_string(escape_string(entry)).splitlines(True)):
        if para.startswith('<'):
            text += para
        else:
            text += '<p>%s</p>' % para
    return text


def build_blogfiles(categories):
    #
    # Create the rhs html links pane for all the categories and the archive years
    #
    category = ''
    archive = ''
    for cat in categories.keys():
        link = '<li><a href="%s" title="%s">%s</a></li>' % (bloghome + cat + '.html', cat, cat)
        if cat.isdigit():
            archive += link
        else:
            category += link
    archivepane = '<div id="archive"> <h2>Archive</h2> <ul>%s</ul> </div>' % archive
    categorypane = '<div id="category"><h2>Category</h2> <ul>%s</ul></div>' % category
    #
    # titles.css contains entries like
    # h1#titleimg2006 { background-image: url(images/titleimg2006.png); }
    #
    titles = open(blogbase+'/titles.css', 'w')

    for cat, files in categories.iteritems():
        #
        # create and image for each category and an reference in the titles.css
        #
        blogheading = "rcjp's blog - %s" % cat
        blogheadingID = 'titleimg' + cat             # can't have an id of '2006' since its a number
        blogheadingImageFile = 'images/'+blogheadingID+'.png'
        make_heading_image(blogheading, blogheadingID)
        titles.write("h1#%s { background-image: url(%s); }\n" % (blogheadingID, blogheadingImageFile))

        archive = open(blogbase+'/%s.html' % cat, 'w')
        archive.write(blogheader)
        archive.write('<h1 class="pageheading" id="%s"> <span>%s</span></h1>' %
                      (blogheadingID, blogheading))
        archive.write(archivepane + categorypane)
        archive.write('<div id="content">')
        for f in categories[cat]:
            archive.write(process_entry(f))
        archive.write('</div>')
        archive.write(blogfooter)
        archive.close()
    titles.close()

def find_entries(blogroot):
    '''walk directories below blogroot to find .txt files for processing
       the subdirectory gives the catagory for the blog entry
       the filename e.g. 20031126.txt YYYYMMDD gives the date for the entry'''
    categories = {}
    for path, dirs, files in os.walk(blogroot):
        if 'tmp' in dirs:           # ignore files in any 'tmp' subdirectory
            dirs.remove('tmp')

        for f in files:
            filename, ext = os.path.splitext(f)
            if ext == '.txt':
                fullname = os.path.join(path, f)
                dir, cat = os.path.split(path)
                year = filename[0:4]
                categories.setdefault(cat, []).append(fullname)
                categories.setdefault(year, []).append(fullname)
    return categories


if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("-u", "--upload", action="store", dest="upload", help="upload to website")
    options, args = parser.parse_args()
    if args:
        parser.error("no arguments are allowed")

    if options.upload is None:
        bloghome = 'file:///home/r/blog/'
    else:
        bloghome = 'http://www.codephrenic.co.uk/blog/'

    categories = find_entries(blogbase)
    print 'Generating blog'
    print 16*'-'
    build_blogfiles(categories)

Advertisements

Leave a Comment »

No comments yet.

RSS feed for comments on this post.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Blog at WordPress.com.

%d bloggers like this: