Source code for bkgen.docx
import os
from lxml import etree
from bl.dict import Dict
from bl.url import URL
from bxml import XML
import bxml.docx
from bkgen import NS
from bkgen.source import Source
[docs]class DOCX(bxml.docx.DOCX, Source):
"""A Word document can be a source that is brought in, and an output format"""
[docs] def document(self, fn=None, **params):
"""returns an XML document containing the content of the Word document"""
from .converters.docx_document import DocxDocument
converter = DocxDocument()
doc = converter.convert(self, fn=fn or os.path.splitext(self.fn)[0]+'.xml', **params)
return doc
# == Source Properties ==
[docs] def documents(self, **params):
"""return a list of documents containing the content of the document"""
# just the one document
return [self.document(**params)]
[docs] def images(self):
"""all the images referred to in the DOCX.
"""
from bf.image import Image
images = []
rels = self.xml(src='word/_rels/document.xml.rels').root
for img in self.xml().root.xpath("//html:img", namespaces=DOCX.NS):
image = Image()
link_rel = XML.find(rels, "//rels:Relationship[@Id='%s']" % img.get('data-link-id'), namespaces=DOCX.NS)
embed_rel = XML.find(rels, "//rels:Relationship[@Id='%s']" % img.get('data-embed-id'), namespaces=DOCX.NS)
if link_rel is not None:
image.fn = URL(link_rel.get('Target')).path
if embed_rel is not None:
image.data = self.read('word/' + embed_rel.get('Target'))
image.fn = os.path.join(self.path, img.attrib.pop('name'))
else:
image.data = open(image.fn, 'rb').read()
images.append(image)
return images
[docs] def stylesheet(self):
return super().stylesheet()
[docs] def numbering_params(self, numId, level):
"""return numbering parameters for the given w:numId an w:lvl / w:ilvl"""
numbering = self.xml(src='word/numbering.xml')
params = Dict(level=str(level))
num = XML.find(numbering.root, "//w:num[@w:numId='%s']" % numId, namespaces=self.NS)
if num is not None:
abstractNumId = XML.find(num, "w:abstractNumId/@w:val", namespaces=self.NS)
if abstractNumId is not None:
abstractNum = XML.find(numbering.root, "//w:abstractNum[@w:abstractNumId='%s']" % abstractNumId, namespaces=self.NS)
if abstractNum is not None:
lvl = XML.find(abstractNum, "w:lvl[@w:ilvl='%s']" % level, namespaces=self.NS)
if lvl is not None:
params['start'] = XML.find(lvl, "w:start/@w:val", namespaces=self.NS)
params['numFmt'] = XML.find(lvl, "w:numFmt/@w:val", namespaces=self.NS)
if params['numFmt'] == 'bullet':
params['ul'] = True
else:
params['ol'] = True
return params