#!/usr/bin/python2.2
import re
from ruby's mombo:
def escape(x): return x.replace('&', '&').replace('<', '<').replace('>', '>')
def sanitize(body):
body=escape(body)
# passthru <a href>, <em>, <i>, <b>, <blockquote>, <br/>, <p> body=re.sub('<a href="([^"]*)">([^&]*)</a>', '<a href="\\1">\\2</a>', body) body=re.sub('<a href=\'([^\']*)\'>([^&]*)</a>', '<a href="\\1">\\2</a>', body) return body
def handle(x):
rd = (("``", '“'), ("''",'â€'), ("`", '‘'), ("'", '\xe2\x80\x99'), ('---', '\xe2\x80\x93'), ('--', '\xe2\x80\x94'), (' \n',' '), ('\n','<br />\n'))
# @@ these two are sorta backwards, but huffman coded...
# @@ need some way to escape and ignore in <code>/<pre>
for k,v in rd: x = x.replace(k, v)
x = re.sub(r'\b_(.*?)_\b', r'<em>\1</em>', x) # @@ I have no idea why I have to use \B here and \b back there: x = re.sub(r'\B\*(.*?)\*\B', r'<strong>\1</strong>', x) x = re.sub(r'\B\|(.*?)\|\B', r'<code>\1</code>', x) return x
def atx(x, full=1):
#x = sanitize(x)
if x and x[-1] == "\n": x = x[:-1] # trim closing \n if exists
<title>"""+title+"""</title> <link rel="stylesheet" type="text/css" href="/style.css" /> </head><body>paras = x.split('\n\n') #@@ requires all in memory nextp, title = None, '' for i in xrange(len(paras)): p = paras[i] if p == '': continue # blank line elif p[0] == '$' or nextp == 'pre': # <pre> p = "<pre>"+p+"</pre>" nextp = None elif re.match(r'^\#+ ', p): # <h?> n=0 while p[n] == '#': n+=1 if n==1: title = p[n:].strip() p = "<h"+`n`+">"+handle(p[n:].strip())+"</h"+`n`+">" elif re.match(r'^ *(\*|\d+\.) ', p): # <ul>/<ol> #@@ should really do <li><p> for paragraphed lists if p.strip()[0] == '*': mode = 'ul' else: mode = 'ol' lines = p.split('\n') li = 0 while li < len(lines): l = lines[li] if (mode == 'ul' and l[0] != '*' and l[:2] != ' *') or \ (mode == 'ol' and not re.match(r'^ *\d+\.', l)): del lines[li] lines[li-1] = lines[li-1] + l else: li += 1 for li in xrange(len(lines)): l = lines[li].strip() if mode == 'ul' and l[0] == '*': l = l[1:] else: l = re.sub(r'^ *\d+\.', '', l) l = ' <li>'+handle(l.strip())+'</li>' lines[li] = l p = '<'+mode+'>\n'+'\n'.join(lines)+'\n</'+mode+'>' elif p[:3] == ' ': # <blockquote> p = "<blockquote>"+handle(p)+"</blockquote>" else: # <p> if p[-2:] == '::': nextp = "pre"; p = p[:-1] p = "<p>"+handle(p)+"</p>" paras[i] = p doc = '\n\n'.join(paras) if full: doc = """<html xmlns="http://www.w3.org/1999/xhtml"><head>
"""+'\n\n'.join(paras) + """
</body></html>"""
return doc
if name == "main":
import sys
if len(sys.argv) <= 1 or sys.argv[1] == "-":
print atx(sys.stdin.read()),
else:
print atx(open(sys.argv[1]).read()),
"""
TODO: smarter pants, generalized phrasals, prime characters (4'3", 80's)
""03cc1d0932bb99b0697f5b5e5961b83ab7fd66f1efc4c9f5c7bad66c1bcbe78f02@52.5.194.83:9735