Source code for bkgen.docx


import os
from lxml import etree
from bl.dict import Dict
from bl.url import URL
from bxml import XML
import bxml.docx
from bkgen import NS
from bkgen.source import Source

[docs]class DOCX(bxml.docx.DOCX, Source): """A Word document can be a source that is brought in, and an output format"""
[docs] def document(self, fn=None, **params): """returns an XML document containing the content of the Word document""" from .converters.docx_document import DocxDocument converter = DocxDocument() doc = converter.convert(self, fn=fn or os.path.splitext(self.fn)[0]+'.xml', **params) return doc
# == Source Properties ==
[docs] def documents(self, **params): """return a list of documents containing the content of the document""" # just the one document return [self.document(**params)]
[docs] def images(self): """all the images referred to in the DOCX. """ from bf.image import Image images = [] rels = self.xml(src='word/_rels/document.xml.rels').root for img in self.xml().root.xpath("//html:img", namespaces=DOCX.NS): image = Image() link_rel = XML.find(rels, "//rels:Relationship[@Id='%s']" % img.get('data-link-id'), namespaces=DOCX.NS) embed_rel = XML.find(rels, "//rels:Relationship[@Id='%s']" % img.get('data-embed-id'), namespaces=DOCX.NS) if link_rel is not None: image.fn = URL(link_rel.get('Target')).path if embed_rel is not None: image.data = self.read('word/' + embed_rel.get('Target')) image.fn = os.path.join(self.path, img.attrib.pop('name')) else: image.data = open(image.fn, 'rb').read() images.append(image) return images
[docs] def metadata(self): """return a Metadata object with the metadata in the document""" from .metadata import Metadata xml = self.xml(src="docProps/core.xml", XMLClass=Metadata) xml.root.tag = "{%(pub)s}metadata" % NS return xml
[docs] def stylesheet(self): return super().stylesheet()
[docs] def numbering_params(self, numId, level): """return numbering parameters for the given w:numId an w:lvl / w:ilvl""" numbering = self.xml(src='word/numbering.xml') params = Dict(level=str(level)) num = XML.find(numbering.root, "//w:num[@w:numId='%s']" % numId, namespaces=self.NS) if num is not None: abstractNumId = XML.find(num, "w:abstractNumId/@w:val", namespaces=self.NS) if abstractNumId is not None: abstractNum = XML.find(numbering.root, "//w:abstractNum[@w:abstractNumId='%s']" % abstractNumId, namespaces=self.NS) if abstractNum is not None: lvl = XML.find(abstractNum, "w:lvl[@w:ilvl='%s']" % level, namespaces=self.NS) if lvl is not None: params['start'] = XML.find(lvl, "w:start/@w:val", namespaces=self.NS) params['numFmt'] = XML.find(lvl, "w:numFmt/@w:val", namespaces=self.NS) if params['numFmt'] == 'bullet': params['ul'] = True else: params['ol'] = True return params