import json from lxml import html from jinja2 import Template, Markup PREFIX = 'book/' MANIFEST = 'info.json' TEMPLATE = """--- title: {{title}} layout: handbook ---
{{body}}
""" def rename_tag(doc, src, dst): for tag in doc.findall('.//' + src): tag.tag = dst def produce(): info = json.load(open(PREFIX + MANIFEST, 'rb')) for file_name in info["spine"]: data = open(PREFIX + file_name + '.html', 'rb').read() doc = html.document_fromstring(data.decode('utf-8')) body = doc.find('.//body') body.tag = 'div' title = doc.find('.//h1') title = title.text.strip() if title is not None else '' print [title] rename_tag(doc, 'h4', 'h5') rename_tag(doc, 'h3', 'h4') rename_tag(doc, 'h2', 'h3') rename_tag(doc, 'h1', 'h2') tpl = { "title": title, "body": html.tostring(body), "toc": info.get("TOC") } content = Template(TEMPLATE).render(tpl) fh = open(file_name + '.html', 'wb') fh.write(content.encode('utf-8')) fh.close() if __name__ == "__main__": produce()