#!/usr/bin/env python
#
# mallard2man.py
#
# Copyright (C) 2014 MongoDB, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
COPYRIGHT_HOLDER = "MongoDB, Inc."
GROUP = "libbson"
BUG_URL = 'https://jira.mongodb.org/browse/CDRIVER'
"""
This script is mean to convert a fairly basic mallard format documentation
page to a groff styled man page.
"""
import os
import re
import sys
import codecs
from datetime import datetime
from xml.etree import ElementTree
INCLUDE = '{http://www.w3.org/2001/XInclude}include'
TITLE = '{http://projectmallard.org/1.0/}title'
SUBTITLE = '{http://projectmallard.org/1.0/}subtitle'
SECTION = '{http://projectmallard.org/1.0/}section'
INFO = '{http://projectmallard.org/1.0/}info'
ITEM = '{http://projectmallard.org/1.0/}item'
LISTING = '{http://projectmallard.org/1.0/}listing'
LIST = '{http://projectmallard.org/1.0/}list'
LINK = '{http://projectmallard.org/1.0/}link'
LINKS = '{http://projectmallard.org/1.0/}links'
SYNOPSIS = '{http://projectmallard.org/1.0/}synopsis'
CODE = '{http://projectmallard.org/1.0/}code'
P = '{http://projectmallard.org/1.0/}p'
SCREEN = '{http://projectmallard.org/1.0/}screen'
EM = '{http://projectmallard.org/1.0/}em'
NOTE = '{http://projectmallard.org/1.0/}note'
TABLE = '{http://projectmallard.org/1.0/}table'
TR = '{http://projectmallard.org/1.0/}tr'
TD = '{http://projectmallard.org/1.0/}td'
OUTPUT = '{http://projectmallard.org/1.0/}output'
# Matches "\" and "-", but not "\-".
replaceables = re.compile(r'(\\(?!-))|((? section element.
self.sections_map = {}
def _parse(self):
self.tree = ElementTree.ElementTree()
self.tree.parse(open(self.inFile))
self.root = self.tree.getroot()
# Python's standard ElementTree doesn't store an element's parent on
# the element. Make a child->parent map.
try:
iterator = self.tree.iter()
except AttributeError:
# Python 2.6.
iterator = self.tree.getiterator()
self.parent_map = dict((c, p) for p in iterator for c in p)
def _get_parent(self, ele):
return self.parent_map[ele]
def _extract(self):
# Extract the title and subtitle.
for child in self.root.getchildren():
if child.tag == TITLE:
# A title like "Version Checks" can't have spaces, otherwise
# the "whatis" entry can't be parsed from the man page title.
self.title = child.text.strip().replace(' ', '_')
elif child.tag == SUBTITLE:
self.subtitle = child.text.strip()
elif child.tag == SECTION:
if child.get('id'):
self.sections_map[child.get('id')] = child
self.sections.append(child)
if not self.subtitle and 'description' in self.sections_map:
# No "subtitle" element, use description section title as subtitle.
self.subtitle = self._section_text(self.sections_map['description'])
def _section_text(self, section):
# Find .
for child in section:
if child.tag != TITLE:
return self._textify_elem(child)
def _textify_elem(self, elem):
return ''.join(elem.itertext()).strip()
def _writeComment(self, text=''):
lines = text.split('\n')
for line in lines:
self.outFile.write('.\\" ')
self.outFile.write(line)
self.outFile.write('\n')
def _escape_char(self, match):
c = match.group(0)
if c == "-":
return r"\(hy"
elif c == "\\":
return "\\e"
assert False, "invalid char passed to _escape_char: %r" % c
def _escape(self, text):
# Avoid "hyphen-used-as-minus-sign" lintian warning about man pages,
# and escape text like "\0" as "\\0". We'll replace all "-" with "\(hy",
# which is an explicit hyphen, but leave alone the first line's
# "name \- description" text.
return replaceables.sub(self._escape_char, text)
def _write(self, text):
self._write_noescape(self._escape(text))
def _write_noescape(self, text):
self.outFile.write(text)
def _writeCommand(self, text):
self._write(text)
self._write('\n')
def _writeLine(self, text):
if text is not None:
text = text.strip()
if text.startswith('.'):
text = '\\&' + text
self._write(text)
self._write('\n')
def _generateHeader(self):
year = datetime.utcnow().year
self._writeComment('This manpage is Copyright (C) %s %s' % (year, COPYRIGHT_HOLDER))
self._writeComment('')
self._writeComment(
"Permission is granted to copy, distribute and/or modify this document\n"
"under the terms of the GNU Free Documentation License, Version 1.3\n"
"or any later version published by the Free Software Foundation;\n"
"with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.\n"
"A copy of the license is included in the section entitled \"GNU\n"
"Free Documentation License\".")
self._writeComment('')
date = datetime.fromtimestamp(int(os.stat(self.inFile).st_mtime)).strftime('%Y-%m-%d')
title = self.title.replace('()','').upper()
self._write('.TH "%s" "%s" "%s" "%s"\n' % (title, self.section, date, GROUP))
self._write('.SH NAME\n')
self._write_noescape('%s \\- %s\n' % (self.title, self.subtitle))
def _generateSection(self, section):
# Try to render the title first
for child in section.getchildren():
if child.tag == TITLE:
s = child.text.strip().upper()
self._writeCommand('.SH "%s"' % s)
for child in section.getchildren():
self._generateElement(child)
if child.tail:
self._writeLine(child.tail)
def _generateSynopsis(self, synopsis):
self._writeCommand('.nf')
for child in synopsis.getchildren():
self._generateElement(child)
if child.tail:
self._writeLine(child.tail)
self._writeCommand('.fi')
def _generateCode(self, code):
text = code.text
is_synopsis = self._get_parent(code).tag.endswith('synopsis')
if text and '\n' not in text and not is_synopsis:
text = text.replace('()', '(%s)' % self.section)
self._writeCommand('.B ' + text)
else:
self._writeCommand('.nf')
self._writeLine(code.text)
for child in code.getchildren():
self._generateElement(child)
self._writeCommand('.fi')
def _generateNote(self, note):
self._writeCommand('.B NOTE')
self._writeCommand('.RS')
for child in note.getchildren():
self._generateElement(child)
if child.tail:
self._writeLine(child.tail)
self._writeCommand('.RE')
def _generateP(self, p):
if p.text:
self._writeLine(p.text)
for child in p.getchildren():
self._generateElement(child)
if child.tail:
self._writeLine(child.tail)
def _generateScreen(self, screen):
for child in screen.getchildren():
self._generateElement(child)
def _generateListing(self, listing):
for child in listing.getchildren():
self._generateElement(child)
def _generateList(self, l):
for child in l.getchildren():
self._generateElement(child)
def _generateEM(self, em):
self._writeCommand('.B %s' % em.text)
def _generateOutput(self, output):
self._generateCode(output)
def _generateItem(self, item):
self._writeCommand('.IP \\[bu] 2')
for child in item.getchildren():
self._generateElement(child)
def _generateElement(self, ele):
if ele.tag == SECTION:
self._generateSection(ele)
elif ele.tag == SYNOPSIS:
self._generateSynopsis(ele)
elif ele.tag == CODE:
self._generateCode(ele)
elif ele.tag == OUTPUT:
self._generateOutput(ele)
elif ele.tag == P:
self._generateP(ele)
elif ele.tag == EM:
self._generateEM(ele)
elif ele.tag == LISTING:
self._generateListing(ele)
elif ele.tag == ITEM:
self._generateItem(ele)
elif ele.tag == LIST:
self._generateList(ele)
elif ele.tag == TITLE:
pass
elif ele.tag == SCREEN:
self._generateScreen(ele)
elif ele.tag == LINK:
self._generateLink(ele)
elif ele.tag == NOTE:
self._generateNote(ele)
elif ele.tag == TABLE:
self._generateTable(ele)
elif ele.tag == TR:
self._generateTr(ele)
elif ele.tag == TD:
self._generateTd(ele)
elif ele.tag == INCLUDE:
f = ele.attrib['href']
f = os.path.join(self.relpath, f)
d = codecs.open(f, 'r', encoding='utf-8').read()
self._writeLine(d)
else:
print('unknown element type %s' % ele)
def _generateTable(self, table):
for child in table.getchildren():
self._generateElement(child)
def _generateTr(self, tr):
self._writeCommand('.TP')
self._writeCommand('.B')
for child in tr.getchildren():
self._generateElement(child)
self._writeCommand('.LP')
def _generateTd(self, td):
for child in td.getchildren():
self._generateElement(child)
def _generateLink(self, link):
text = link.text
if text and '()' in text:
text = text.replace('()', '(%s)' % self.section)
if text:
self._writeCommand('.B ' + text)
def _generateSections(self):
for section in self.sections:
self._generateElement(section)
def _generateFooter(self):
self._write('\n.B')
self._write('\n.SH COLOPHON')
self._write('\nThis page is part of %s.' % GROUP)
self._write('\nPlease report any bugs at %s.' % BUG_URL.replace('-','\\-'))
def _generate(self):
self.realname = self.outFile
self.outFile = codecs.open(self.outFile + '.tmp', 'w', encoding='utf-8')
self._generateHeader()
self._generateSections()
self._generateFooter()
os.rename(self.outFile.name, self.realname)
self.outFile.close()
def convert(self):
self._parse()
self._extract()
self._generate()
def main(filenames, section='3'):
for inFile in filenames:
dirName = os.path.dirname(inFile) + '/man/'
baseName = os.path.basename(inFile)
baseFile = os.path.splitext(baseName)[0]
outFile = dirName + baseFile + '.' + section
c = Convert(inFile, outFile, section)
c.convert()
if __name__ == '__main__':
if len(sys.argv) < 3:
print('usage: %s SECTION FILENAMES...' % sys.argv[0])
sys.exit(1)
section = sys.argv[1]
main(sys.argv[2:], section)
sys.exit(0)