diff --git a/src/crowsetta/formats/bbox/audbbox.py b/src/crowsetta/formats/bbox/audbbox.py index f7168c9..aa538a0 100644 --- a/src/crowsetta/formats/bbox/audbbox.py +++ b/src/crowsetta/formats/bbox/audbbox.py @@ -10,7 +10,7 @@ import attr import pandas as pd -import pandera +import pandera.pandas from pandera.typing import Series import crowsetta @@ -97,21 +97,18 @@ def df_to_lines(df: pd.DataFrame) -> list[str]: return lines -class AudBBoxSchema(pandera.DataFrameModel): - """A :class:`pandera.DataFrameModel - - ` that - validates :mod:`pandas` dataframes - loaded from Audacity label tracks - in extended format, exported to txt files - https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges +class AudBBoxSchema(pandera.pandas.DataFrameModel): + """A :class:`pandera.pandas.DataFrameModel` that + validates :mod:`pandas` dataframes loaded from Audacity label tracks + in extended format then exported to txt files + https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges """ - begin_time_s: Series[float] = pandera.Field(coerce=True) - end_time_s: Series[float] = pandera.Field(coerce=True) - label: Series[pd.StringDtype] = pandera.Field(coerce=True) - low_freq_hz: Series[float] = pandera.Field(coerce=True) - high_freq_hz: Series[float] = pandera.Field(coerce=True) + begin_time_s: Series[float] = pandera.pandas.Field(coerce=True) + end_time_s: Series[float] = pandera.pandas.Field(coerce=True) + label: Series[pd.StringDtype] = pandera.pandas.Field(coerce=True) + low_freq_hz: Series[float] = pandera.pandas.Field(coerce=True) + high_freq_hz: Series[float] = pandera.pandas.Field(coerce=True) class Config: ordered = True diff --git a/src/crowsetta/formats/bbox/raven.py b/src/crowsetta/formats/bbox/raven.py index d01ca5c..7e28fc9 100644 --- a/src/crowsetta/formats/bbox/raven.py +++ b/src/crowsetta/formats/bbox/raven.py @@ -11,26 +11,23 @@ import attr import pandas as pd -import pandera +import pandera.pandas from pandera.typing import Series import crowsetta from crowsetta.typing import PathLike -class RavenSchema(pandera.DataFrameModel): - """A :class:`pandera.DataFrameModel - - ` that validates :type:`pandas.DataFrame`s - loaded from a txt file, created by exporting a Selection Table - from Raven. +class RavenSchema(pandera.pandas.DataFrameModel): + """A :class:`pandera.pandas.DataFrameModel` that validates a :type:`pandas.DataFrame` + loaded from a txt file, created by exporting a Selection Table from Raven. """ - begin_time_s: Series[float] = pandera.Field() - end_time_s: Series[float] = pandera.Field() - low_freq_hz: Series[float] = pandera.Field() - high_freq_hz: Series[float] = pandera.Field() - annotation: Series[pd.StringDtype] = pandera.Field(coerce=True) + begin_time_s: Series[float] = pandera.pandas.Field() + end_time_s: Series[float] = pandera.pandas.Field() + low_freq_hz: Series[float] = pandera.pandas.Field() + high_freq_hz: Series[float] = pandera.pandas.Field() + annotation: Series[pd.StringDtype] = pandera.pandas.Field(coerce=True) class Config: # we set strict fo False diff --git a/src/crowsetta/formats/seq/audseq.py b/src/crowsetta/formats/seq/audseq.py index fd2782b..57ee3ba 100644 --- a/src/crowsetta/formats/seq/audseq.py +++ b/src/crowsetta/formats/seq/audseq.py @@ -9,28 +9,24 @@ import attr import numpy as np import pandas as pd -import pandera +import pandera.pandas from pandera.typing import Series import crowsetta from crowsetta.typing import PathLike -class AudSeqSchema(pandera.DataFrameModel): - """A :class:`pandera.DataFrameModel +class AudSeqSchema(pandera.pandas.DataFrameModel): + """A :class:`pandera.DataFrameModel` that validates a :type:`pandas.DataFrame` + loaded from Audacity Labeltrack annotations exported to txt files in the standard format. - ` - that validates :type:`pandas.DataFrame`s - loaded from Audacity Labeltrack annotations - exported to txt files in the standard format. - - The standard format is described here: - https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Standard_.28default.29_format + The standard format is described here: + https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Standard_.28default.29_format """ - start_time: Optional[Series[float]] = pandera.Field() - end_time: Optional[Series[float]] = pandera.Field() - label: Series[pd.StringDtype] = pandera.Field(coerce=True) + start_time: Optional[Series[float]] = pandera.pandas.Field() + end_time: Optional[Series[float]] = pandera.pandas.Field() + label: Series[pd.StringDtype] = pandera.pandas.Field(coerce=True) class Config: ordered = True @@ -42,7 +38,7 @@ class Config: class AudSeq: """Class meant to represent Audacity Labeltrack annotations - exported to txt files in the standard format[1]_. + exported to txt files in the standard format [1]_. The txt file will have 3 tab-separated columns that represent the start time, end time, and labels diff --git a/src/crowsetta/formats/seq/generic.py b/src/crowsetta/formats/seq/generic.py index 4065930..626714d 100644 --- a/src/crowsetta/formats/seq/generic.py +++ b/src/crowsetta/formats/seq/generic.py @@ -22,7 +22,7 @@ import attr import pandas as pd -import pandera +import pandera.pandas from pandera.typing import Series import crowsetta @@ -36,27 +36,24 @@ """ -class GenericSeqSchema(pandera.DataFrameModel): - """A :class: `pandera.DataFrameModel - - ` that validates - :type:`pandas.DataFrame`s - loaded from a csv file in the ``'generic-seq'`` annotation - format. +class GenericSeqSchema(pandera.pandas.DataFrameModel): + """A :class: `pandera.pandas.DataFrameModel` that validates + a :type:`pandas.DataFrame` loaded from a csv file + in the ``'generic-seq'`` annotation format. """ - label: Series[pd.StringDtype] = pandera.Field(coerce=True) - onset_s: Optional[Series[float]] = pandera.Field() - offset_s: Optional[Series[float]] = pandera.Field() - onset_sample: Optional[Series[int]] = pandera.Field() - offset_sample: Optional[Series[int]] = pandera.Field() + label: Series[pd.StringDtype] = pandera.pandas.Field(coerce=True) + onset_s: Optional[Series[float]] = pandera.pandas.Field() + offset_s: Optional[Series[float]] = pandera.pandas.Field() + onset_sample: Optional[Series[int]] = pandera.pandas.Field() + offset_sample: Optional[Series[int]] = pandera.pandas.Field() - notated_path: Series[str] = pandera.Field(coerce=True) - annot_path: Series[str] = pandera.Field(coerce=True) - sequence: Series[int] = pandera.Field() - annotation: Series[int] = pandera.Field() + notated_path: Series[str] = pandera.pandas.Field(coerce=True) + annot_path: Series[str] = pandera.pandas.Field(coerce=True) + sequence: Series[int] = pandera.pandas.Field() + annotation: Series[int] = pandera.pandas.Field() - @pandera.dataframe_check(error=ONSET_OFFSET_COLS_ERR) + @pandera.pandas.dataframe_check(error=ONSET_OFFSET_COLS_ERR) def both_onset_s_and_offset_s_if_either(cls, df: pd.DataFrame) -> bool: """check that, if one of {'onset_s', 'offset_s'} column is present, then both are present""" @@ -64,8 +61,8 @@ def both_onset_s_and_offset_s_if_either(cls, df: pd.DataFrame) -> bool: return all([col in df for col in ("onset_s", "offset_s")]) else: return True - - @pandera.dataframe_check(error=ONSET_OFFSET_COLS_ERR) + + @pandera.pandas.dataframe_check(error=ONSET_OFFSET_COLS_ERR) def both_onset_sample_and_offset_sample_if_either(cls, df: pd.DataFrame) -> bool: """check that, if one of {'onset_sample', 'offset_sample'} column is present, then both are present""" @@ -74,7 +71,7 @@ def both_onset_sample_and_offset_sample_if_either(cls, df: pd.DataFrame) -> bool else: return True - @pandera.dataframe_check(error=ONSET_OFFSET_COLS_ERR) + @pandera.pandas.dataframe_check(error=ONSET_OFFSET_COLS_ERR) def onset_offset_s_and_ind_are_not_both_missing(cls, df: pd.DataFrame) -> bool: """check that at least one of the on/offset column pairs is present: either {'onset_s', 'offset_s'} or {'onset_sample', 'offset_sample'}""" diff --git a/src/crowsetta/formats/seq/simple.py b/src/crowsetta/formats/seq/simple.py index cf0c471..7725294 100644 --- a/src/crowsetta/formats/seq/simple.py +++ b/src/crowsetta/formats/seq/simple.py @@ -24,16 +24,16 @@ import attr import numpy as np import pandas as pd -import pandera +import pandera.pandas from pandera.typing import Series import crowsetta from crowsetta.typing import PathLike -class SimpleSeqSchema(pandera.DataFrameModel): - """A :class:`pandera.DataFrameModel` - that validates :type:`pandas.DataFrame`s +class SimpleSeqSchema(pandera.pandas.DataFrameModel): + """A :class:`pandera.pandas.DataFrameModel` + that validates a :type:`pandas.DataFrame` loaded from a csv or txt file in a 'simple-seq' format. The :meth:`SimpleSeq.from_file` loads the :type:`pandas.DataFrame` @@ -41,9 +41,9 @@ class SimpleSeqSchema(pandera.DataFrameModel): before validation, e.g., changing column names. """ - onset_s: Optional[Series[float]] = pandera.Field() - offset_s: Optional[Series[float]] = pandera.Field() - label: Series[pd.StringDtype] = pandera.Field(coerce=True) + onset_s: Optional[Series[float]] = pandera.pandas.Field() + offset_s: Optional[Series[float]] = pandera.pandas.Field() + label: Series[pd.StringDtype] = pandera.pandas.Field(coerce=True) class Config: ordered = True diff --git a/src/crowsetta/formats/seq/timit.py b/src/crowsetta/formats/seq/timit.py index 68383c1..92ec37e 100644 --- a/src/crowsetta/formats/seq/timit.py +++ b/src/crowsetta/formats/seq/timit.py @@ -12,7 +12,7 @@ import attr import numpy as np import pandas as pd -import pandera +import pandera.pandas import soundfile from pandera.typing import Series @@ -20,21 +20,19 @@ from crowsetta.typing import PathLike -class TimitTranscriptSchema(pandera.DataFrameModel): - """A :class:`pandera.DataFrameModel +class TimitTranscriptSchema(pandera.pandas.DataFrameModel): + """A :class:`pandera.pandas.DataFrameModel` that validates a :type:`pandas.DataFrame` + loaded from a phn or wrd file in the TIMIT[1]_ transcription format. - ` that validates :type:`pandas.DataFrame`s - loaded from a phn or wrd file in the TIMIT[1]_ transcription format. - - References - ---------- - .. [1] Garofolo, John S., et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. - Web Download. Philadelphia: Linguistic Data Consortium, 1993. + References + ---------- + .. [1] Garofolo, John S., et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. + Web Download. Philadelphia: Linguistic Data Consortium, 1993. """ - begin_sample: Optional[Series[int]] = pandera.Field() - end_sample: Optional[Series[int]] = pandera.Field() - text: Series[pd.StringDtype] = pandera.Field(coerce=True) + begin_sample: Optional[Series[int]] = pandera.pandas.Field() + end_sample: Optional[Series[int]] = pandera.pandas.Field() + text: Series[pd.StringDtype] = pandera.pandas.Field(coerce=True) class Config: ordered = True diff --git a/tests/test_formats/test_seq/test_simple.py b/tests/test_formats/test_seq/test_simple.py index ca3c07f..2533b8a 100644 --- a/tests/test_formats/test_seq/test_simple.py +++ b/tests/test_formats/test_seq/test_simple.py @@ -1,6 +1,5 @@ import filecmp import inspect -import tempfile import numpy as np import pandas as pd