Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions include/khmer/_cpy_hashtable.hh
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,39 @@ PyObject *
hashtable_consume_seqfile(khmer_KHashtable_Object * me, PyObject * args);


PyObject *
hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me,
PyObject * args);


PyObject *
hashtable_consume_seqfile_banding(khmer_KHashtable_Object * me, PyObject * args);


PyObject *
hashtable_consume_seqfile_banding_with_reads_parser(
khmer_KHashtable_Object * me, PyObject * args
);


PyObject *
hashtable_consume_seqfile_with_mask(khmer_KHashtable_Object * me, PyObject * args);


PyObject *
hashtable_consume_seqfile_with_mask_with_reads_parser(
khmer_KHashtable_Object * me, PyObject * args
);


PyObject *
hashtable_consume_seqfile_banding_with_mask(khmer_KHashtable_Object * me, PyObject * args);


PyObject *
hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me,
PyObject * args);

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this function up for consistency's sake: for each overloaded method, the version that accepts a string is first, and the version that accepts a parser is second.

hashtable_consume_seqfile_banding_with_mask_with_reads_parser(
khmer_KHashtable_Object * me, PyObject * args
);


PyObject *
Expand Down
180 changes: 150 additions & 30 deletions src/khmer/_cpy_hashtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,25 +109,43 @@ PyMethodDef khmer_hashtable_methods[] = {
"Increment the counts of all the k-mers in the sequences in the "
"given file"
},
{
"consume_seqfile_with_reads_parser",
(PyCFunction)hashtable_consume_seqfile_with_reads_parser, METH_VARARGS,
"Count all k-mers retrieved with this reads parser object."
},
{
"consume_seqfile_banding",
(PyCFunction)hashtable_consume_seqfile_banding, METH_VARARGS,
"Consume sequences in k-mer banding mode"
},
{
"consume_seqfile_banding_with_reads_parser",
(PyCFunction)hashtable_consume_seqfile_banding_with_reads_parser,
METH_VARARGS,
"Consume sequences in k-mer banding mode"
},
{
"consume_seqfile_with_mask",
(PyCFunction)hashtable_consume_seqfile_with_mask, METH_VARARGS,
"Consume any k-mers not present in the provided mask"
},
{
"consume_seqfile_with_mask_with_reads_parser",
(PyCFunction)hashtable_consume_seqfile_with_mask_with_reads_parser,
METH_VARARGS,
"Consume any k-mers not present in the provided mask"
},
{
"consume_seqfile_banding_with_mask",
(PyCFunction)hashtable_consume_seqfile_banding_with_mask, METH_VARARGS,
"Consume sequences in k-mer banding mode, with a mask"
},
{
"consume_seqfile_with_reads_parser",
(PyCFunction)hashtable_consume_seqfile_with_reads_parser, METH_VARARGS,
"Count all k-mers retrieved with this reads parser object."
"consume_seqfile_banding_with_mask_with_reads_parser",
(PyCFunction)hashtable_consume_seqfile_banding_with_mask_with_reads_parser,
METH_VARARGS,
"Consume sequences in k-mer banding mode, with a mask"
},
{
"get",
Expand Down Expand Up @@ -369,6 +387,52 @@ hashtable_consume_seqfile(khmer_KHashtable_Object * me, PyObject * args)
return Py_BuildValue("IK", total_reads, n_consumed);
}

PyObject *
hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me,
PyObject * args)
{
Hashtable * hashtable = me->hashtable;

PyObject * rparser_obj = NULL;

if (!PyArg_ParseTuple(args, "O", &rparser_obj)) {
return NULL;
}

FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj );

// call the C++ function, and trap signals => Python
unsigned long long n_consumed = 0;
unsigned int total_reads = 0;
const char *value_exception = NULL;
const char *file_exception = NULL;
std::string exc_string;

Py_BEGIN_ALLOW_THREADS
try {
hashtable->consume_seqfile<FastxReader>(rparser, total_reads, n_consumed);
} catch (oxli_file_exception &exc) {
exc_string = exc.what();
file_exception = exc_string.c_str();
} catch (oxli_value_exception &exc) {
exc_string = exc.what();
value_exception = exc_string.c_str();
}
Py_END_ALLOW_THREADS

if (file_exception != NULL) {
PyErr_SetString(PyExc_OSError, file_exception);
return NULL;
}
if (value_exception != NULL) {
PyErr_SetString(PyExc_ValueError, value_exception);
return NULL;
}

rparser->close();
return Py_BuildValue("IK", total_reads, n_consumed);
}

PyObject *
hashtable_consume_seqfile_banding(khmer_KHashtable_Object * me, PyObject * args)
{
Expand Down Expand Up @@ -398,6 +462,38 @@ hashtable_consume_seqfile_banding(khmer_KHashtable_Object * me, PyObject * args)
return Py_BuildValue("IK", total_reads, n_consumed);
}

PyObject *
hashtable_consume_seqfile_banding_with_reads_parser(khmer_KHashtable_Object * me, PyObject * args)
{
Hashtable * hashtable = me->hashtable;

PyObject * rparser_obj = NULL;
unsigned int num_bands;
unsigned int band;

if (!PyArg_ParseTuple(args, "OII", &rparser_obj, &num_bands, &band)) {
return NULL;
}

FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj );

// call the C++ function, and trap signals => Python
unsigned long long n_consumed = 0;
unsigned int total_reads = 0;
try {
hashtable->consume_seqfile_banding<FastxReader>(rparser, num_bands, band, total_reads, n_consumed);
} catch (oxli_file_exception &exc) {
PyErr_SetString(PyExc_OSError, exc.what());
return NULL;
} catch (oxli_value_exception &exc) {
PyErr_SetString(PyExc_ValueError, exc.what());
return NULL;
}

rparser->close();
return Py_BuildValue("IK", total_reads, n_consumed);
}

PyObject *
hashtable_consume_seqfile_with_mask(khmer_KHashtable_Object * me, PyObject * args)
{
Expand Down Expand Up @@ -429,6 +525,40 @@ hashtable_consume_seqfile_with_mask(khmer_KHashtable_Object * me, PyObject * arg
return Py_BuildValue("IK", total_reads, n_consumed);
}

PyObject *
hashtable_consume_seqfile_with_mask_with_reads_parser(khmer_KHashtable_Object * me, PyObject * args)
{
Hashtable * hashtable = me->hashtable;

PyObject * rparser_obj = NULL;
khmer_KHashtable_Object *mask = NULL;
unsigned int threshold = 0;

if (!PyArg_ParseTuple(args, "OO|I", &rparser_obj, &mask, &threshold)) {
return NULL;
}

FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj );

// call the C++ function, and trap signals => Python
unsigned long long n_consumed = 0;
unsigned int total_reads = 0;
try {
hashtable->consume_seqfile_with_mask<FastxReader>(
rparser, mask->hashtable, threshold, total_reads, n_consumed
);
} catch (oxli_file_exception &exc) {
PyErr_SetString(PyExc_OSError, exc.what());
return NULL;
} catch (oxli_value_exception &exc) {
PyErr_SetString(PyExc_ValueError, exc.what());
return NULL;
}

rparser->close();
return Py_BuildValue("IK", total_reads, n_consumed);
}

PyObject *
hashtable_consume_seqfile_banding_with_mask(khmer_KHashtable_Object * me, PyObject * args)
{
Expand Down Expand Up @@ -464,52 +594,42 @@ hashtable_consume_seqfile_banding_with_mask(khmer_KHashtable_Object * me, PyObje
}

PyObject *
hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me,
PyObject * args)
hashtable_consume_seqfile_banding_with_mask_with_reads_parser(khmer_KHashtable_Object * me, PyObject * args)

@standage standage Aug 8, 2017

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like git is struggling with the similarity between all of the variants of this function and the fact that I cut n pasted this function further up, again for consistency. I promise the CPython code changes are a lot more boring than this diff would suggest. :-)

{
Hashtable * hashtable = me->hashtable;
Hashtable * hashtable = me->hashtable;

PyObject * rparser_obj = NULL;
unsigned int num_bands;
unsigned int band;
khmer_KHashtable_Object *mask = NULL;
unsigned int threshold = 0;

if (!PyArg_ParseTuple(args, "O", &rparser_obj)) {
if (!PyArg_ParseTuple(args, "OIIO|I", &rparser_obj, &num_bands, &band, &mask, &threshold)) {
return NULL;
}

FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj );

// call the C++ function, and trap signals => Python
unsigned long long n_consumed = 0;
unsigned int total_reads = 0;
const char *value_exception = NULL;
const char *file_exception = NULL;
std::string exc_string;

Py_BEGIN_ALLOW_THREADS
unsigned long long n_consumed = 0;
unsigned int total_reads = 0;
try {
hashtable->consume_seqfile<FastxReader>(rparser, total_reads, n_consumed);
hashtable->consume_seqfile_banding_with_mask<FastxReader>(
rparser, num_bands, band, mask->hashtable, threshold, total_reads,
n_consumed
);
} catch (oxli_file_exception &exc) {
exc_string = exc.what();
file_exception = exc_string.c_str();
} catch (oxli_value_exception &exc) {
exc_string = exc.what();
value_exception = exc_string.c_str();
}
Py_END_ALLOW_THREADS

if (file_exception != NULL) {
PyErr_SetString(PyExc_OSError, file_exception);
PyErr_SetString(PyExc_OSError, exc.what());
return NULL;
}
if (value_exception != NULL) {
PyErr_SetString(PyExc_ValueError, value_exception);
} catch (oxli_value_exception &exc) {
PyErr_SetString(PyExc_ValueError, exc.what());
return NULL;
}

rparser->close();
return Py_BuildValue("IK", total_reads, n_consumed);
}



PyObject *
hashtable_consume(khmer_KHashtable_Object * me, PyObject * args)
{
Expand Down
3 changes: 2 additions & 1 deletion tests/test_banding.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ def test_banding_to_disk(ksize, memory, numbands):

ct = khmer.Counttable(ksize, memory / 4, 4)
for band in range(numbands):
ct.consume_seqfile_banding(infile, numbands, band)
parser = khmer.ReadParser(infile)
ct.consume_seqfile_banding_with_reads_parser(parser, numbands, band)
ct.save(path2)
fpr = khmer.calc_expected_collisions(ct)
print('FPR', fpr)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_counttable.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ def test_consume_with_mask():
assert ct.get('TTTGAGAAAAAAG') == 1
assert ct.get('TTGAGAAAAAAGT') == 1

ct = khmer.Counttable(13, 1e3, 4)
parser = khmer.ReadParser(infile)
nr, nk = ct.consume_seqfile_with_mask_with_reads_parser(parser, mask)
assert nr == 1
assert nk == 3


def test_consume_banding_with_mask():
"""
Expand All @@ -137,6 +143,14 @@ def test_consume_banding_with_mask():
assert ct.get('TTTGAGAAAAAAG') == 0 # out of band
assert ct.get('TTGAGAAAAAAGT') == 1

ct = khmer.Counttable(13, 1e3, 4)
parser = khmer.ReadParser(infile)
nr, nk = ct.consume_seqfile_banding_with_mask_with_reads_parser(
parser, 4, 1, mask
)
assert nr == 1
assert nk == 1


def test_consume_with_mask_threshold():
"""
Expand Down