diff --git a/horror/cell_starting_with_a_space.ods b/horror/cell_starting_with_a_space.ods
new file mode 100644
index 0000000..a0cd364
Binary files /dev/null and b/horror/cell_starting_with_a_space.ods differ
diff --git a/horror/simple.fods b/horror/simple.fods
new file mode 100644
index 0000000..0aed0bf
--- /dev/null
+++ b/horror/simple.fods
@@ -0,0 +1,273 @@
+
+
+
+ 2013-07-08T13:31:432013-07-19T10:14:48P0D3LibreOffice/5.3.4.2$Windows_x86 LibreOffice_project/f82d347ccc0be322489bf7da61d7e4ad13fe2ff3
+
+
+ 0
+ 0
+ 6321
+ 1781
+
+
+ view1
+
+
+ 0
+ 4
+ 0
+ 0
+ 0
+ 0
+ 2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 64
+ 60
+ true
+ false
+
+
+ Sheet1
+ 1856
+ 0
+ 64
+ 60
+ false
+ true
+ true
+ true
+ 12632256
+ true
+ true
+ true
+ true
+ false
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ false
+
+
+
+
+ true
+ true
+ true
+ true
+ 12632256
+ true
+ false
+ true
+ 3
+ true
+ false
+ false
+ 1000
+ 1000
+ 1
+ 1
+ true
+ true
+ true
+ Microsoft Print to PDF
+ FRb+/01pY3Jvc29mdCBQcmludCB0byBQREYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATWljcm9zb2Z0IFByaW50IFRvIFBERgAAAAAAAAAAAAAWAAEAMhUAAAAAAAAIAFZUAAAkbQAAM1ROVwYATQBpAGMAcgBvAHMAbwBmAHQAIABQAHIAaQBuAHQAIAB0AG8AIABQAEQARgAAAAAAAAAAAAAAAAAAAAAAAAAAAAEEAwbcAFAUAy8BAAEAAQDqCm8IZAABAA8AWAICAAEAWAIDAAEATABlAHQAdABlAHIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAABAAAAAgAAAAEAAAD/////R0lTNAAAAAAAAAAAAAAAAERJTlUiAMgAJAMsET9de34AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQAAAAAACQABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIAAAAU01USgAAAAAQALgAewAwADgANABGADAAMQBGAEEALQBFADYAMwA0AC0ANABEADcANwAtADgAMwBFAEUALQAwADcANAA4ADEANwBDADAAMwA1ADgAMQB9AAAAUkVTRExMAFVuaXJlc0RMTABQYXBlclNpemUATEVUVEVSAE9yaWVudGF0aW9uAFBPUlRSQUlUAFJlc29sdXRpb24AUmVzT3B0aW9uMQBDb2xvck1vZGUAQ29sb3IAAAAAAAAAAAAAAAAAACwRAABWNERNAQAAAAAAAACcCnAiHAAAAOwAAAADAAAA+gFPCDTmd02D7gdIF8A1gdAAAABMAAAAAwAAAAAIAAAAAAAAAAAAAAMAAAAACAAAKgAAAAAIAAADAAAAQAAAAFYAAAAAEAAARABvAGMAdQBtAGUAbgB0AFUAcwBlAHIAUABhAHMAcwB3AG8AcgBkAAAARABvAGMAdQBtAGUAbgB0AE8AdwBuAGUAcgBQAGEAcwBzAHcAbwByAGQAAABEAG8AYwB1AG0AZQBuAHQAQwByAHkAcAB0AFMAZQBjAHUAcgBpAHQAeQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASAENPTVBBVF9EVVBMRVhfTU9ERRMARHVwbGV4TW9kZTo6VW5rbm93bg==
+ 0
+ false
+ true
+ true
+ false
+ false
+ false
+ 7
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ £
+
+
+
+
+ -
+ £
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /
+
+ /
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ???
+
+
+
+ Page 1
+
+
+
+
+
+
+ ??? (???)
+
+
+ 00/00/0000, 00:00:00
+
+
+
+
+ Page 1 / 99
+
+
+
+
+
+
+
+
+
+
+
+ Name
+
+
+ Age
+
+
+ When
+
+
+
+
+ Bob
+
+
+ 20
+
+
+ 10/10/10
+
+
+
+
+ Jane
+
+
+ 23
+
+
+ 01/01/12
+
+
+
+
+ Ian
+
+
+ 34
+
+
+ 11/11/15
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/messytables/__init__.py b/messytables/__init__.py
index e2c03b9..f1f4137 100644
--- a/messytables/__init__.py
+++ b/messytables/__init__.py
@@ -9,6 +9,7 @@
from messytables.core import Cell, TableSet, RowSet, seekable_stream
from messytables.commas import CSVTableSet, CSVRowSet
from messytables.ods import ODSTableSet, ODSRowSet
+from messytables.fods import FODSTableSet, FODSRowSet
from messytables.excel import XLSTableSet, XLSRowSet
# XLSXTableSet has been deprecated and its functionality is now provided by
diff --git a/messytables/any.py b/messytables/any.py
index fd9dfc5..1c9290b 100644
--- a/messytables/any.py
+++ b/messytables/any.py
@@ -1,5 +1,5 @@
from messytables import (ZIPTableSet, PDFTableSet, CSVTableSet, XLSTableSet,
- HTMLTableSet, ODSTableSet)
+ HTMLTableSet, ODSTableSet, FODSTableSet)
import messytables
import re
@@ -39,6 +39,7 @@ def TABTableSet(fileobj):
'HTML': HTMLTableSet,
'CSV': CSVTableSet,
'ODS': ODSTableSet,
+ 'FODS': FODSTableSet,
'PDF': PDFTableSet}
@@ -107,7 +108,8 @@ def guess_ext(ext):
'xlsm': 'XLS',
'xltx': 'XLS',
'xltm': 'XLS',
- 'ods': 'ODS'}
+ 'ods': 'ODS',
+ 'fods': 'FODS'}
if ext in lookup:
return lookup.get(ext, None)
diff --git a/messytables/fods.py b/messytables/fods.py
new file mode 100644
index 0000000..bf568cf
--- /dev/null
+++ b/messytables/fods.py
@@ -0,0 +1,215 @@
+import io
+import re
+
+from lxml import etree
+
+from messytables.core import RowSet, TableSet, Cell
+from messytables.types import (StringType, DecimalType,
+ DateType, BoolType, CurrencyType,
+ TimeType, PercentageType)
+
+
+FODS_NAMESPACES_TAG_MATCH = re.compile(
+ b"(]*>)", re.MULTILINE)
+ODS_TABLE_MATCH = re.compile(
+ b".*?().*?", re.DOTALL)
+ODS_TABLE_NAME = re.compile(b'.*?table:name=\"(.*?)\".*?')
+ODS_ROW_MATCH = re.compile(
+ b".*?().*?", re.DOTALL)
+
+NS_OPENDOCUMENT_PTTN = u"urn:oasis:names:tc:opendocument:xmlns:%s"
+NS_CAL_PTTN = u"urn:org:documentfoundation:names:experimental:calc:xmlns:%s"
+NS_OPENDOCUMENT_TABLE = NS_OPENDOCUMENT_PTTN % "table:1.0"
+NS_OPENDOCUMENT_OFFICE = NS_OPENDOCUMENT_PTTN % "office:1.0"
+
+TABLE_CELL = 'table-cell'
+VALUE_TYPE = 'value-type'
+COLUMN_REPEAT = 'number-columns-repeated'
+EMPTY_CELL_VALUE = ''
+
+ODS_VALUE_TOKEN = {
+ "float": "value",
+ "date": "date-value",
+ "time": "time-value",
+ "boolean": "boolean-value",
+ "percentage": "value",
+ "currency": "value"
+}
+
+ODS_TYPES = {
+ 'float': DecimalType(),
+ 'date': DateType('%Y-%m-%d'),
+ 'boolean': BoolType(),
+ 'percentage': PercentageType(),
+ 'time': TimeType()
+}
+
+
+class FODSTableSet(TableSet):
+ """
+ A wrapper around ODS files. Because they are zipped and the info we want
+ is in the zipped file as content.xml we must ensure that we either have
+ a seekable object (local file) or that we retrieve all of the content from
+ the remote URL.
+ """
+
+ def __init__(self, fileobj, window=None, **kw):
+ '''Initialize the object.
+
+ :param fileobj: may be a file path or a file-like object. Note the
+ file-like object *must* be in binary mode and must be seekable (it will
+ get passed to zipfile).
+
+ As a specific tip: urllib2.urlopen returns a file-like object that is
+ not in file-like mode while urllib.urlopen *does*!
+
+ To get a seekable file you *cannot* use
+ messytables.core.seekable_stream as it does not support the full seek
+ functionality.
+ '''
+ if hasattr(fileobj, 'read'):
+ # wrap in a StringIO so we do not have hassle with seeks and
+ # binary etc (see notes to __init__ above)
+ # TODO: rather wasteful if in fact fileobj comes from disk
+ fileobj = io.BytesIO(fileobj.read())
+
+ self.window = window
+
+ self.content = fileobj.read()
+
+ def make_tables(self):
+ """
+ Return the sheets in the workbook.
+
+ A regex is used for this to avoid having to:
+
+ 1. load large the entire file into memory, or
+ 2. SAX parse the file more than once
+ """
+ namespace_tags = self._get_namespace_tags()
+ sheets = [m.groups(0)[0]
+ for m in ODS_TABLE_MATCH.finditer(self.content)]
+ return [FODSRowSet(sheet, self.window, namespace_tags)
+ for sheet in sheets]
+
+ def _get_namespace_tags(self):
+ match = re.search(FODS_NAMESPACES_TAG_MATCH, self.content)
+ assert match
+ tag_open = match.groups()[0]
+ tag_close = b''
+ return tag_open, tag_close
+
+
+class FODSRowSet(RowSet):
+ """ ODS support for a single sheet in the ODS workbook. Unlike
+ the CSV row set this is not a streaming operation. """
+
+ def __init__(self, sheet, window=None, namespace_tags=None):
+ self.sheet = sheet
+
+ self.name = "Unknown"
+ m = ODS_TABLE_NAME.match(self.sheet)
+ if m:
+ self.name = m.groups(0)[0]
+
+ self.window = window or 1000
+
+ # We must wrap the XML fragments in a valid header otherwise iterparse
+ # will explode with certain (undefined) versions of libxml2. The
+ # namespaces are in the ODS file, and change with the libreoffice
+ # version saving it, so get them from the ODS file if possible. The
+ # default namespaces are an option to preserve backwards compatibility
+ # of ODSRowSet.
+ if namespace_tags:
+ self.namespace_tags = namespace_tags
+ else:
+ namespaces = {
+ "dc": u"http://purl.org/dc/elements/1.1/",
+ "draw": NS_OPENDOCUMENT_PTTN % u"drawing:1.0",
+ "number": NS_OPENDOCUMENT_PTTN % u"datastyle:1.0",
+ "office": NS_OPENDOCUMENT_PTTN % u"office:1.0",
+ "svg": NS_OPENDOCUMENT_PTTN % u"svg-compatible:1.0",
+ "table": NS_OPENDOCUMENT_PTTN % u"table:1.0",
+ "text": NS_OPENDOCUMENT_PTTN % u"text:1.0",
+ "calcext": NS_CAL_PTTN % u"calcext:1.0",
+ }
+
+ ods_header = u""\
+ .format(" ".join('xmlns:{0}="{1}"'.format(k, v)
+ for k, v in namespaces.iteritems())).encode('utf-8')
+ ods_footer = u"".encode('utf-8')
+ self.namespace_tags = (ods_header, ods_footer)
+
+ super(FODSRowSet, self).__init__(typed=True)
+
+ def raw(self, sample=False):
+ """ Iterate over all rows in this sheet. """
+ rows = ODS_ROW_MATCH.findall(self.sheet)
+
+ for row in rows:
+ row_data = []
+
+ block = self.namespace_tags[0] + row + self.namespace_tags[1]
+ partial = io.BytesIO(block)
+ empty_row = True
+
+ for action, element in etree.iterparse(partial, ('end',)):
+ if element.tag != _tag(NS_OPENDOCUMENT_TABLE, TABLE_CELL):
+ continue
+
+ cell = _read_cell(element)
+ if empty_row is True and cell.value != EMPTY_CELL_VALUE:
+ empty_row = False
+
+ repeat = element.attrib.get(
+ _tag(NS_OPENDOCUMENT_TABLE, COLUMN_REPEAT))
+ if repeat:
+ number_of_repeat = int(repeat)
+ row_data += [cell] * number_of_repeat
+ else:
+ row_data.append(cell)
+
+ if empty_row:
+ # ignore blank lines
+ continue
+
+ del partial
+ yield row_data
+ del rows
+
+
+def _read_cell(element):
+ cell_type = element.attrib.get(_tag(NS_OPENDOCUMENT_OFFICE, VALUE_TYPE))
+ value_token = ODS_VALUE_TOKEN.get(cell_type, 'value')
+ if cell_type == 'string':
+ cell = _read_text_cell(element)
+ elif cell_type == 'currency':
+ value = element.attrib.get(_tag(NS_OPENDOCUMENT_OFFICE, value_token))
+ currency = element.attrib.get(_tag(NS_OPENDOCUMENT_OFFICE, 'currency'))
+ cell = Cell(value + ' ' + currency, type=CurrencyType())
+ elif cell_type is not None:
+ value = element.attrib.get(_tag(NS_OPENDOCUMENT_OFFICE, value_token))
+ cell = Cell(value, type=ODS_TYPES.get(cell_type, StringType()))
+ else:
+ cell = Cell(EMPTY_CELL_VALUE, type=StringType())
+
+ return cell
+
+
+def _read_text_cell(element):
+ children = element.getchildren()
+ text_content = []
+ for child in children:
+ if child.text:
+ text_content.append(child.text)
+ else:
+ text_content.append(EMPTY_CELL_VALUE)
+ if len(text_content) > 0:
+ cell_value = '\n'.join(text_content)
+ else:
+ cell_value = EMPTY_CELL_VALUE
+ return Cell(cell_value, type=StringType())
+
+
+def _tag(namespace, tag):
+ return '{%s}%s' % (namespace, tag)
diff --git a/test/test_any.py b/test/test_any.py
index 1fbfe78..74a0f59 100644
--- a/test/test_any.py
+++ b/test/test_any.py
@@ -5,7 +5,7 @@
from nose.tools import assert_equal
from nose.plugins.skip import SkipTest
from messytables import (any_tableset, XLSTableSet, ZIPTableSet, PDFTableSet,
- CSVTableSet, ODSTableSet,
+ CSVTableSet, ODSTableSet, FODSTableSet,
ReadError)
suite = [{'filename': 'simple.csv', 'tableset': CSVTableSet},
@@ -13,6 +13,7 @@
{'filename': 'simple.xlsx', 'tableset': XLSTableSet},
{'filename': 'simple.zip', 'tableset': ZIPTableSet},
{'filename': 'simple.ods', 'tableset': ODSTableSet},
+ {'filename': 'simple.fods', 'tableset': FODSTableSet},
{'filename': 'bian-anal-mca-2005-dols-eng-1011-0312-tab3.xlsm',
'tableset': XLSTableSet},
]