diff --git a/src/tally/commands/inspect.py b/src/tally/commands/inspect.py index 9e98932..5218cd9 100644 --- a/src/tally/commands/inspect.py +++ b/src/tally/commands/inspect.py @@ -154,7 +154,9 @@ def cmd_inspect(args): if spec.delimiter: print(f" - Delimiter: {repr(spec.delimiter)}") - # Build suggested format string + analysis = _analyze_amount_column_detailed(filepath, spec.amount_column, has_header=True, dialect=dialect) + + # Build suggested format string template max_col = max(spec.date_column, spec.description_column, spec.amount_column) cols = [] @@ -164,24 +166,23 @@ def cmd_inspect(args): elif i == spec.description_column: cols.append('{description}') elif i == spec.amount_column: - cols.append('{amount}') + cols.append('') else: cols.append('{_}') format_str = ', '.join(cols) - print(f"\n Suggested format string:") + print(f"\n Suggested format string template:") print(f' format: "{format_str}"') if spec.delimiter: print(f' delimiter: "{spec.delimiter}"') # Analyze amount patterns - detailed analysis with both signs - analysis = _analyze_amount_column_detailed(filepath, spec.amount_column, has_header=True, dialect=dialect) if analysis: print("\n" + "=" * 70) print("Amount Distribution:") print("-" * 70) - print(f" {analysis['positive_count']} positive amounts, totaling ${analysis['positive_total']:,.2f}") - print(f" {analysis['negative_count']} negative amounts, totaling ${analysis['negative_total']:,.2f}") + print(" " + _format_amount_distribution_line(analysis['positive_count'], 'positive', analysis['positive_total'])) + print(" " + _format_amount_distribution_line(analysis['negative_count'], 'negative', analysis['negative_total'])) # Show format observations if analysis['format_observations']: @@ -203,11 +204,18 @@ def cmd_inspect(args): truncated = desc[:45] + '...' if len(desc) > 45 else desc print(f" -${abs(amt):,.2f} {truncated}") - # Show amount modifier options (not recommendations) - print("\n Amount modifiers available:") - print(f" {{amount}} - use values as-is") - print(f" {{-amount}} - negate (flip sign)") - print(f" {{+amount}} - absolute value") + print("\n Sign observations:") + for line in _describe_amount_sign_observations(analysis): + print(f" - {line}") + + # Show amount modifier options with context + print("\n How to use amount tokens:") + print(f" {{amount}} - use amounts as they appear in the CSV") + print(f" {{-amount}} - flip the sign from the CSV") + print(f" {{+amount}} - make all amounts positive") + + for line in _describe_amount_token_usage(analysis): + print(f" - {line}") # Detect currency symbol from amount column currency_symbol = _detect_currency_symbol(filepath, spec.amount_column, has_header=True, dialect=dialect) @@ -317,141 +325,39 @@ def _detect_file_format(filepath): return result -def _analyze_amount_patterns(filepath, amount_col, has_header=True, delimiter=None, max_rows=1000): - """ - Analyze amount column patterns to help users understand their data's sign convention. +def _describe_amount_sign_observations(analysis): + """Describe what the sampled amount signs look like.""" + if not analysis: + return ["No non-zero amounts were found in the sampled rows."] - Returns dict with: - - positive_count: number of positive amounts - - negative_count: number of negative amounts - - positive_total: sum of positive amounts - - negative_total: sum of negative amounts (as positive number) - - sign_convention: 'expenses_positive' or 'expenses_negative' - - suggest_negate: True if user should use {-amount} to normalize - - sample_credits: list of (description, amount) for likely transfers/income - """ - import re as re_mod + positive_count = analysis['positive_count'] + negative_count = analysis['negative_count'] + total_count = positive_count + negative_count + if total_count == 0: + return ["No non-zero amounts were found in the sampled rows."] - positive_count = 0 - negative_count = 0 - positive_total = 0.0 - negative_total = 0.0 - sample_credits = [] # (description, amount) tuples + return [ + f"Observed {positive_count} positive and {negative_count} negative non-zero amounts in the sampled rows.", + ] - def parse_amount(val): - """Parse amount string to float, handling currency symbols and parentheses.""" - if not val: - return None - val = val.strip() - # Remove currency symbols, commas - val = re_mod.sub(r'[$€£¥,]', '', val) - # Handle parentheses as negative - if val.startswith('(') and val.endswith(')'): - val = '-' + val[1:-1] - try: - return float(val) - except ValueError: - return None - try: - with open(filepath, 'r', encoding='utf-8') as f: - if delimiter and delimiter.startswith('regex:'): - # Regex-based parsing - pattern = re_mod.compile(delimiter[6:]) - for i, line in enumerate(f): - if has_header and i == 0: - continue - if i >= max_rows: - break - line = line.strip() - if not line: - continue - match = pattern.match(line) - if match: - groups = match.groups() - if amount_col < len(groups): - amount = parse_amount(groups[amount_col]) - if amount is not None: - desc = groups[1] if len(groups) > 1 else '' - if amount >= 0: - positive_count += 1 - positive_total += amount - else: - negative_count += 1 - negative_total += abs(amount) - if len(sample_credits) < 10: - sample_credits.append((desc.strip(), amount)) - else: - # Standard CSV - reader = csv.reader(f) - if has_header: - headers = next(reader, None) - desc_col = 1 # default - for idx, h in enumerate(headers or []): - hl = h.lower() - if 'desc' in hl or 'merchant' in hl or 'payee' in hl or 'name' in hl: - desc_col = idx - break - else: - desc_col = 1 - - for i, row in enumerate(reader): - if i >= max_rows: - break - if amount_col < len(row): - amount = parse_amount(row[amount_col]) - if amount is not None: - desc = row[desc_col] if desc_col < len(row) else '' - if amount >= 0: - positive_count += 1 - positive_total += amount - else: - negative_count += 1 - negative_total += abs(amount) - if len(sample_credits) < 10: - sample_credits.append((desc.strip(), amount)) - except Exception: - return None +def _format_amount_distribution_line(count, sign_label, total): + """Format an amount distribution line with correct singular/plural wording.""" + noun = 'amount' if count == 1 else 'amounts' + return f"{count} {sign_label} {noun}, totaling ${total:,.2f}" - total_count = positive_count + negative_count - if total_count == 0: - return None - # Determine sign convention based on distribution - # Expenses positive: mostly positive amounts (typical credit card export) - # Expenses negative: mostly negative amounts (typical bank export) - positive_pct = positive_count / total_count * 100 - - if positive_pct > 70: - sign_convention = 'expenses_positive' - suggest_negate = False - rationale = "mostly positive amounts (expenses are positive)" - elif positive_pct < 30: - sign_convention = 'expenses_negative' - suggest_negate = True - rationale = "mostly negative amounts (expenses are negative)" - else: - # Mixed - harder to tell - if positive_total > negative_total: - sign_convention = 'expenses_positive' - suggest_negate = False - rationale = "total positive exceeds negative" - else: - sign_convention = 'expenses_negative' - suggest_negate = True - rationale = "total negative exceeds positive" +def _describe_amount_token_usage(analysis): + """Explain how {amount} and {-amount} relate to the observed signs.""" + if not analysis: + return ['Replace in the format string template with the option you choose.'] - return { - 'positive_count': positive_count, - 'negative_count': negative_count, - 'positive_total': positive_total, - 'negative_total': negative_total, - 'positive_pct': positive_pct, - 'sign_convention': sign_convention, - 'suggest_negate': suggest_negate, - 'rationale': rationale, - 'sample_credits': sample_credits, - } + return [ + "Use {amount} to keep amounts as they appear in the CSV.", + "Use {-amount} to flip the sign of amounts from the CSV.", + "Use {+amount} if you want all parsed amounts to be positive.", + 'Replace in the format string template with the option you choose.', + ] def _analyze_columns(filepath, has_header=True, max_rows=100, dialect=None): diff --git a/tests/test_inspect.py b/tests/test_inspect.py index bd3ce85..6de6ce2 100644 --- a/tests/test_inspect.py +++ b/tests/test_inspect.py @@ -1,5 +1,6 @@ """Tests for inspect command - CSV sniffing and column analysis.""" +from argparse import Namespace import csv import pytest import tempfile @@ -10,6 +11,7 @@ _analyze_columns, _analyze_amount_column_detailed, _detect_currency_symbol, + cmd_inspect, ) @@ -948,3 +950,60 @@ def test_tab_delimiter(self): assert spec.delimiter == '\t' finally: os.unlink(tmpfile) + + +class TestCmdInspect: + """Regression tests for inspect command amount guidance.""" + + def test_reports_negative_amount_observations(self, capsys): + """Exports with mostly negative amounts should report the observed sign counts.""" + csv_content = """Date,Description,Amount +01/15/2025,GROCERY STORE,-123.45 +01/16/2025,PAYROLL,2500.00 +01/17/2025,COFFEE SHOP,-5.99 +01/18/2025,RENT,-1200.00 +""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write(csv_content) + tmpfile = f.name + + try: + cmd_inspect(Namespace(file=tmpfile, rows=3)) + captured = capsys.readouterr() + assert 'Suggested format string template:' in captured.out + assert 'format: "{date:%m/%d/%Y}, {description}, "' in captured.out + assert '1 positive amount, totaling $2,500.00' in captured.out + assert '3 negative amounts, totaling $1,329.44' in captured.out + assert 'Observed 1 positive and 3 negative non-zero amounts in the sampled rows.' in captured.out + assert "Use {amount} to keep amounts as they appear in the CSV." in captured.out + assert "Use {-amount} to flip the sign of amounts from the CSV." in captured.out + assert "Use {+amount} if you want all parsed amounts to be positive." in captured.out + assert "Replace in the format string template with the option you choose." in captured.out + finally: + os.unlink(tmpfile) + + def test_reports_positive_amount_observations(self, capsys): + """Exports with mostly positive amounts should report the observed sign counts.""" + csv_content = """Date,Description,Amount +01/15/2025,GROCERY STORE,123.45 +01/16/2025,COFFEE SHOP,5.99 +01/17/2025,GAS STATION,45.00 +01/18/2025,PAYMENT,-500.00 +""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: + f.write(csv_content) + tmpfile = f.name + + try: + cmd_inspect(Namespace(file=tmpfile, rows=4)) + captured = capsys.readouterr() + assert 'format: "{date:%m/%d/%Y}, {description}, "' in captured.out + assert '3 positive amounts, totaling $174.44' in captured.out + assert '1 negative amount, totaling $500.00' in captured.out + assert 'Observed 3 positive and 1 negative non-zero amounts in the sampled rows.' in captured.out + assert "Use {amount} to keep amounts as they appear in the CSV." in captured.out + assert "Use {-amount} to flip the sign of amounts from the CSV." in captured.out + assert "Use {+amount} if you want all parsed amounts to be positive." in captured.out + assert "Replace in the format string template with the option you choose." in captured.out + finally: + os.unlink(tmpfile)