Source code for matrixb.source.xlsx
# Copyright (c) 2019-2020 Kevin Crouse
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# @license: http://www.apache.org/licenses/LICENSE-2.0
# @author: Kevin Crouse (krcrouse@gmail.com)
from .base import SourceBase
import os
[docs]class XLSX(SourceBase):
def __init__(self, *args, **kwargs):
self._iterator = None
self.workbook = None
super().__init__(*args, **kwargs)
[docs] def open_stream(self):
import openpyxl
self.workbook = openpyxl.load_workbook(os.path.expanduser(self.filename), read_only=True, data_only=True)
self._worksheets = self.workbook.sheetnames
if self.worksheet:
self.xlsheet = self.workbook[self.worksheet]
else:
self.xlsheet = self.workbook.worksheets[0] ### this could be self.workbook.active, but that has given weird results
self.worksheet = self.xlsheet.title
self._iterator = iter(self.xlsheet.rows)
@property
def iterator(self):
if not self._iterator:
self.open()
return(self._iterator)
@property
def worksheets(self):
if not self.workbook:
self.open()
return(self._worksheets)
@property
def worksheet(self): return(self._worksheet)
@worksheet.setter
def worksheet(self, worksheet):
self._worksheet = worksheet
self.xlsheet = self.workbook[worksheet]
[docs] def skip_rows(self, count):
skipped = []
for i in range(count):
row = next(self.iterator)
skipped.append(list(map(lambda cell: cell.value, row)))
return(skipped)
[docs] def next_row(self):
hasText = False
while True:
try:
rowcontainer = next(self.iterator)
except StopIteration:
self.workbook.close()
raise(StopIteration)
row = []
cellcount = 0
for cell in rowcontainer:
cellcount += 1
v = cell.value
if type(v) is str:
if self.nonemptyre.search(v):
hasText = cellcount
else:
v = None
elif v is not None:
hasText = cellcount
else:
v = None
# add the cell value to the row
row.append(v)
if hasText is not False:
return(row[:hasText])
[docs] @classmethod
def export_to(self, matrix, filename, topmatter=None, autosize=None):
"""The export_to class method to export a matrixb matrix to a modern Excel ('.xlsx') file.
Args:
matrix (matrixb.Matrix): The Matrix object to export.
filename (str): The full path to send the file.
topmatter (list|str, optional): Lines to appear above the exported table.
autosize (bool): TODO.
"""
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
irow = 0
# handle topmatter
if topmatter:
if type(topmatter) in (list, tuple):
for rowdata in topmatter:
ws.append(rowdata)
irow += 1
else:
ws.append([topmatter])
irow += 1
# handle column headers, with some formatting
headerborder = openpyxl.styles.Border(bottom=openpyxl.styles.Side(border_style='thick',color='000000'))
headerfont = openpyxl.styles.Font(bold=True, size=12)
for icol in range(len(matrix.columns)):
cell = ws.cell(irow+1, icol+1)
cell.value = matrix.columns[icol]
cell.font = headerfont
cell.border = headerborder
irow += 1
for rowdata in matrix:
ws.append(rowdata)
wb.save(filename)
def __getstate__(self):
""" To pickle/serialze the csv source, we delete the stream and filehandle - this will allow future restored objects to get things like the source filename, but attempts to access the source object will fail. """
state = super().__getstate__()
del state['workbook']
del state['xlsheet']
del state['_iterator']
return(state)