Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions messytables/commas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from itertools import islice
from ilines import ilines
import csv
import codecs
Expand Down Expand Up @@ -102,19 +103,15 @@ def __init__(self, name, fileobj, delimiter=None, quotechar=None,
self.name = name
seekable_fileobj = messytables.seekable_stream(fileobj)
self.fileobj = UTF8Recoder(seekable_fileobj, encoding)
self.lines = ilines(self.fileobj)
self.lines = list(ilines(self.fileobj))
self._sample = []
self.delimiter = delimiter
self.quotechar = quotechar
self.window = window or 1000
self.doublequote = doublequote
self.lineterminator = lineterminator
self.skipinitialspace = skipinitialspace
try:
for i in xrange(self.window):
self._sample.append(self.lines.next())
except StopIteration:
pass
self._sample = list(islice(self.lines, self.window))
super(CSVRowSet, self).__init__()

@property
Expand Down Expand Up @@ -148,11 +145,10 @@ def _overrides(self):

def raw(self, sample=False):
def rows():
for line in self._sample:
yield line
if not sample:
for line in self.lines:
yield line
if sample:
return iter(self._sample)
else:
return iter(self.lines)

# Fix the maximum field size to something a little larger
csv.field_size_limit(256000)
Expand Down
4 changes: 2 additions & 2 deletions messytables/headers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import defaultdict
from itertools import izip_longest
from itertools import izip_longest, islice

from messytables.core import Cell

Expand All @@ -26,7 +26,7 @@ def headers_guess(rows, tolerance=1):
The return value is a tuple of the offset of the header row
and the names of the columns.
"""
rows = list(rows)
rows = list(islice(rows, 1000))
modal = column_count_modal(rows)
for i, row in enumerate(rows):
length = len([c for c in row if not c.empty])
Expand Down
4 changes: 2 additions & 2 deletions messytables/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def type_guess(rows, types=TYPES, strict=False):
guesses.append(typesdict)
at_least_one_value.append(False)
for ci, cell in enumerate(row):
if not cell.value:
if cell.value is None or cell.value == '':
continue
at_least_one_value[ci] = True
for type in guesses[ci].keys():
Expand All @@ -239,7 +239,7 @@ def type_guess(rows, types=TYPES, strict=False):
for i, cell in enumerate(row):
# add string guess so that we have at least one guess
guesses[i][StringType()] = guesses[i].get(StringType(), 0)
if not cell.value:
if cell.value is None or cell.value == '':
continue
for type in type_instances:
if type.test(cell.value):
Expand Down