From 1dd44e91e347398e2e1f4514c5812f321449b2e5 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Tue, 8 Aug 2017 00:15:34 -0700 Subject: [PATCH] Expose *_with_reads_parser method overloads to Python API --- include/khmer/_cpy_hashtable.hh | 22 +++- src/khmer/_cpy_hashtable.cc | 180 ++++++++++++++++++++++++++------ tests/test_banding.py | 3 +- tests/test_counttable.py | 14 +++ 4 files changed, 186 insertions(+), 33 deletions(-) diff --git a/include/khmer/_cpy_hashtable.hh b/include/khmer/_cpy_hashtable.hh index 51f33382f5..f16f0ecf06 100644 --- a/include/khmer/_cpy_hashtable.hh +++ b/include/khmer/_cpy_hashtable.hh @@ -48,21 +48,39 @@ PyObject * hashtable_consume_seqfile(khmer_KHashtable_Object * me, PyObject * args); +PyObject * +hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me, + PyObject * args); + + PyObject * hashtable_consume_seqfile_banding(khmer_KHashtable_Object * me, PyObject * args); +PyObject * +hashtable_consume_seqfile_banding_with_reads_parser( + khmer_KHashtable_Object * me, PyObject * args +); + + PyObject * hashtable_consume_seqfile_with_mask(khmer_KHashtable_Object * me, PyObject * args); +PyObject * +hashtable_consume_seqfile_with_mask_with_reads_parser( + khmer_KHashtable_Object * me, PyObject * args +); + + PyObject * hashtable_consume_seqfile_banding_with_mask(khmer_KHashtable_Object * me, PyObject * args); PyObject * -hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me, - PyObject * args); +hashtable_consume_seqfile_banding_with_mask_with_reads_parser( + khmer_KHashtable_Object * me, PyObject * args +); PyObject * diff --git a/src/khmer/_cpy_hashtable.cc b/src/khmer/_cpy_hashtable.cc index 7f7555d443..6d012ee861 100644 --- a/src/khmer/_cpy_hashtable.cc +++ b/src/khmer/_cpy_hashtable.cc @@ -109,25 +109,43 @@ PyMethodDef khmer_hashtable_methods[] = { "Increment the counts of all the k-mers in the sequences in the " "given file" }, + { + "consume_seqfile_with_reads_parser", + (PyCFunction)hashtable_consume_seqfile_with_reads_parser, METH_VARARGS, + "Count all k-mers retrieved with this reads parser object." + }, { "consume_seqfile_banding", (PyCFunction)hashtable_consume_seqfile_banding, METH_VARARGS, "Consume sequences in k-mer banding mode" }, + { + "consume_seqfile_banding_with_reads_parser", + (PyCFunction)hashtable_consume_seqfile_banding_with_reads_parser, + METH_VARARGS, + "Consume sequences in k-mer banding mode" + }, { "consume_seqfile_with_mask", (PyCFunction)hashtable_consume_seqfile_with_mask, METH_VARARGS, "Consume any k-mers not present in the provided mask" }, + { + "consume_seqfile_with_mask_with_reads_parser", + (PyCFunction)hashtable_consume_seqfile_with_mask_with_reads_parser, + METH_VARARGS, + "Consume any k-mers not present in the provided mask" + }, { "consume_seqfile_banding_with_mask", (PyCFunction)hashtable_consume_seqfile_banding_with_mask, METH_VARARGS, "Consume sequences in k-mer banding mode, with a mask" }, { - "consume_seqfile_with_reads_parser", - (PyCFunction)hashtable_consume_seqfile_with_reads_parser, METH_VARARGS, - "Count all k-mers retrieved with this reads parser object." + "consume_seqfile_banding_with_mask_with_reads_parser", + (PyCFunction)hashtable_consume_seqfile_banding_with_mask_with_reads_parser, + METH_VARARGS, + "Consume sequences in k-mer banding mode, with a mask" }, { "get", @@ -369,6 +387,52 @@ hashtable_consume_seqfile(khmer_KHashtable_Object * me, PyObject * args) return Py_BuildValue("IK", total_reads, n_consumed); } +PyObject * +hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me, + PyObject * args) +{ + Hashtable * hashtable = me->hashtable; + + PyObject * rparser_obj = NULL; + + if (!PyArg_ParseTuple(args, "O", &rparser_obj)) { + return NULL; + } + + FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj ); + + // call the C++ function, and trap signals => Python + unsigned long long n_consumed = 0; + unsigned int total_reads = 0; + const char *value_exception = NULL; + const char *file_exception = NULL; + std::string exc_string; + + Py_BEGIN_ALLOW_THREADS + try { + hashtable->consume_seqfile(rparser, total_reads, n_consumed); + } catch (oxli_file_exception &exc) { + exc_string = exc.what(); + file_exception = exc_string.c_str(); + } catch (oxli_value_exception &exc) { + exc_string = exc.what(); + value_exception = exc_string.c_str(); + } + Py_END_ALLOW_THREADS + + if (file_exception != NULL) { + PyErr_SetString(PyExc_OSError, file_exception); + return NULL; + } + if (value_exception != NULL) { + PyErr_SetString(PyExc_ValueError, value_exception); + return NULL; + } + + rparser->close(); + return Py_BuildValue("IK", total_reads, n_consumed); +} + PyObject * hashtable_consume_seqfile_banding(khmer_KHashtable_Object * me, PyObject * args) { @@ -398,6 +462,38 @@ hashtable_consume_seqfile_banding(khmer_KHashtable_Object * me, PyObject * args) return Py_BuildValue("IK", total_reads, n_consumed); } +PyObject * +hashtable_consume_seqfile_banding_with_reads_parser(khmer_KHashtable_Object * me, PyObject * args) +{ + Hashtable * hashtable = me->hashtable; + + PyObject * rparser_obj = NULL; + unsigned int num_bands; + unsigned int band; + + if (!PyArg_ParseTuple(args, "OII", &rparser_obj, &num_bands, &band)) { + return NULL; + } + + FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj ); + + // call the C++ function, and trap signals => Python + unsigned long long n_consumed = 0; + unsigned int total_reads = 0; + try { + hashtable->consume_seqfile_banding(rparser, num_bands, band, total_reads, n_consumed); + } catch (oxli_file_exception &exc) { + PyErr_SetString(PyExc_OSError, exc.what()); + return NULL; + } catch (oxli_value_exception &exc) { + PyErr_SetString(PyExc_ValueError, exc.what()); + return NULL; + } + + rparser->close(); + return Py_BuildValue("IK", total_reads, n_consumed); +} + PyObject * hashtable_consume_seqfile_with_mask(khmer_KHashtable_Object * me, PyObject * args) { @@ -429,6 +525,40 @@ hashtable_consume_seqfile_with_mask(khmer_KHashtable_Object * me, PyObject * arg return Py_BuildValue("IK", total_reads, n_consumed); } +PyObject * +hashtable_consume_seqfile_with_mask_with_reads_parser(khmer_KHashtable_Object * me, PyObject * args) +{ + Hashtable * hashtable = me->hashtable; + + PyObject * rparser_obj = NULL; + khmer_KHashtable_Object *mask = NULL; + unsigned int threshold = 0; + + if (!PyArg_ParseTuple(args, "OO|I", &rparser_obj, &mask, &threshold)) { + return NULL; + } + + FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj ); + + // call the C++ function, and trap signals => Python + unsigned long long n_consumed = 0; + unsigned int total_reads = 0; + try { + hashtable->consume_seqfile_with_mask( + rparser, mask->hashtable, threshold, total_reads, n_consumed + ); + } catch (oxli_file_exception &exc) { + PyErr_SetString(PyExc_OSError, exc.what()); + return NULL; + } catch (oxli_value_exception &exc) { + PyErr_SetString(PyExc_ValueError, exc.what()); + return NULL; + } + + rparser->close(); + return Py_BuildValue("IK", total_reads, n_consumed); +} + PyObject * hashtable_consume_seqfile_banding_with_mask(khmer_KHashtable_Object * me, PyObject * args) { @@ -464,52 +594,42 @@ hashtable_consume_seqfile_banding_with_mask(khmer_KHashtable_Object * me, PyObje } PyObject * -hashtable_consume_seqfile_with_reads_parser(khmer_KHashtable_Object * me, - PyObject * args) +hashtable_consume_seqfile_banding_with_mask_with_reads_parser(khmer_KHashtable_Object * me, PyObject * args) { - Hashtable * hashtable = me->hashtable; + Hashtable * hashtable = me->hashtable; PyObject * rparser_obj = NULL; + unsigned int num_bands; + unsigned int band; + khmer_KHashtable_Object *mask = NULL; + unsigned int threshold = 0; - if (!PyArg_ParseTuple(args, "O", &rparser_obj)) { + if (!PyArg_ParseTuple(args, "OIIO|I", &rparser_obj, &num_bands, &band, &mask, &threshold)) { return NULL; } FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser( rparser_obj ); // call the C++ function, and trap signals => Python - unsigned long long n_consumed = 0; - unsigned int total_reads = 0; - const char *value_exception = NULL; - const char *file_exception = NULL; - std::string exc_string; - - Py_BEGIN_ALLOW_THREADS + unsigned long long n_consumed = 0; + unsigned int total_reads = 0; try { - hashtable->consume_seqfile(rparser, total_reads, n_consumed); + hashtable->consume_seqfile_banding_with_mask( + rparser, num_bands, band, mask->hashtable, threshold, total_reads, + n_consumed + ); } catch (oxli_file_exception &exc) { - exc_string = exc.what(); - file_exception = exc_string.c_str(); - } catch (oxli_value_exception &exc) { - exc_string = exc.what(); - value_exception = exc_string.c_str(); - } - Py_END_ALLOW_THREADS - - if (file_exception != NULL) { - PyErr_SetString(PyExc_OSError, file_exception); + PyErr_SetString(PyExc_OSError, exc.what()); return NULL; - } - if (value_exception != NULL) { - PyErr_SetString(PyExc_ValueError, value_exception); + } catch (oxli_value_exception &exc) { + PyErr_SetString(PyExc_ValueError, exc.what()); return NULL; } + rparser->close(); return Py_BuildValue("IK", total_reads, n_consumed); } - - PyObject * hashtable_consume(khmer_KHashtable_Object * me, PyObject * args) { diff --git a/tests/test_banding.py b/tests/test_banding.py index cbd163e07a..a1cccef8c8 100644 --- a/tests/test_banding.py +++ b/tests/test_banding.py @@ -110,7 +110,8 @@ def test_banding_to_disk(ksize, memory, numbands): ct = khmer.Counttable(ksize, memory / 4, 4) for band in range(numbands): - ct.consume_seqfile_banding(infile, numbands, band) + parser = khmer.ReadParser(infile) + ct.consume_seqfile_banding_with_reads_parser(parser, numbands, band) ct.save(path2) fpr = khmer.calc_expected_collisions(ct) print('FPR', fpr) diff --git a/tests/test_counttable.py b/tests/test_counttable.py index 15b7808a0b..a9f3f12e9c 100644 --- a/tests/test_counttable.py +++ b/tests/test_counttable.py @@ -111,6 +111,12 @@ def test_consume_with_mask(): assert ct.get('TTTGAGAAAAAAG') == 1 assert ct.get('TTGAGAAAAAAGT') == 1 + ct = khmer.Counttable(13, 1e3, 4) + parser = khmer.ReadParser(infile) + nr, nk = ct.consume_seqfile_with_mask_with_reads_parser(parser, mask) + assert nr == 1 + assert nk == 3 + def test_consume_banding_with_mask(): """ @@ -137,6 +143,14 @@ def test_consume_banding_with_mask(): assert ct.get('TTTGAGAAAAAAG') == 0 # out of band assert ct.get('TTGAGAAAAAAGT') == 1 + ct = khmer.Counttable(13, 1e3, 4) + parser = khmer.ReadParser(infile) + nr, nk = ct.consume_seqfile_banding_with_mask_with_reads_parser( + parser, 4, 1, mask + ) + assert nr == 1 + assert nk == 1 + def test_consume_with_mask_threshold(): """