Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 45 additions & 139 deletions src/tally/commands/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ def cmd_inspect(args):
if spec.delimiter:
print(f" - Delimiter: {repr(spec.delimiter)}")

# Build suggested format string
analysis = _analyze_amount_column_detailed(filepath, spec.amount_column, has_header=True, dialect=dialect)

# Build suggested format string template
max_col = max(spec.date_column, spec.description_column, spec.amount_column)

cols = []
Expand All @@ -164,24 +166,23 @@ def cmd_inspect(args):
elif i == spec.description_column:
cols.append('{description}')
elif i == spec.amount_column:
cols.append('{amount}')
cols.append('<amount token>')
else:
cols.append('{_}')

format_str = ', '.join(cols)
print(f"\n Suggested format string:")
print(f"\n Suggested format string template:")
print(f' format: "{format_str}"')
if spec.delimiter:
print(f' delimiter: "{spec.delimiter}"')

# Analyze amount patterns - detailed analysis with both signs
analysis = _analyze_amount_column_detailed(filepath, spec.amount_column, has_header=True, dialect=dialect)
if analysis:
print("\n" + "=" * 70)
print("Amount Distribution:")
print("-" * 70)
print(f" {analysis['positive_count']} positive amounts, totaling ${analysis['positive_total']:,.2f}")
print(f" {analysis['negative_count']} negative amounts, totaling ${analysis['negative_total']:,.2f}")
print(" " + _format_amount_distribution_line(analysis['positive_count'], 'positive', analysis['positive_total']))
print(" " + _format_amount_distribution_line(analysis['negative_count'], 'negative', analysis['negative_total']))

# Show format observations
if analysis['format_observations']:
Expand All @@ -203,11 +204,18 @@ def cmd_inspect(args):
truncated = desc[:45] + '...' if len(desc) > 45 else desc
print(f" -${abs(amt):,.2f} {truncated}")

# Show amount modifier options (not recommendations)
print("\n Amount modifiers available:")
print(f" {{amount}} - use values as-is")
print(f" {{-amount}} - negate (flip sign)")
print(f" {{+amount}} - absolute value")
print("\n Sign observations:")
for line in _describe_amount_sign_observations(analysis):
print(f" - {line}")

# Show amount modifier options with context
print("\n How to use amount tokens:")
print(f" {{amount}} - use amounts as they appear in the CSV")
print(f" {{-amount}} - flip the sign from the CSV")
print(f" {{+amount}} - make all amounts positive")

for line in _describe_amount_token_usage(analysis):
print(f" - {line}")

# Detect currency symbol from amount column
currency_symbol = _detect_currency_symbol(filepath, spec.amount_column, has_header=True, dialect=dialect)
Expand Down Expand Up @@ -317,141 +325,39 @@ def _detect_file_format(filepath):
return result


def _analyze_amount_patterns(filepath, amount_col, has_header=True, delimiter=None, max_rows=1000):
"""
Analyze amount column patterns to help users understand their data's sign convention.
def _describe_amount_sign_observations(analysis):
"""Describe what the sampled amount signs look like."""
if not analysis:
return ["No non-zero amounts were found in the sampled rows."]

Returns dict with:
- positive_count: number of positive amounts
- negative_count: number of negative amounts
- positive_total: sum of positive amounts
- negative_total: sum of negative amounts (as positive number)
- sign_convention: 'expenses_positive' or 'expenses_negative'
- suggest_negate: True if user should use {-amount} to normalize
- sample_credits: list of (description, amount) for likely transfers/income
"""
import re as re_mod
positive_count = analysis['positive_count']
negative_count = analysis['negative_count']
total_count = positive_count + negative_count
if total_count == 0:
return ["No non-zero amounts were found in the sampled rows."]

positive_count = 0
negative_count = 0
positive_total = 0.0
negative_total = 0.0
sample_credits = [] # (description, amount) tuples
return [
f"Observed {positive_count} positive and {negative_count} negative non-zero amounts in the sampled rows.",
]

def parse_amount(val):
"""Parse amount string to float, handling currency symbols and parentheses."""
if not val:
return None
val = val.strip()
# Remove currency symbols, commas
val = re_mod.sub(r'[$€£¥,]', '', val)
# Handle parentheses as negative
if val.startswith('(') and val.endswith(')'):
val = '-' + val[1:-1]
try:
return float(val)
except ValueError:
return None

try:
with open(filepath, 'r', encoding='utf-8') as f:
if delimiter and delimiter.startswith('regex:'):
# Regex-based parsing
pattern = re_mod.compile(delimiter[6:])
for i, line in enumerate(f):
if has_header and i == 0:
continue
if i >= max_rows:
break
line = line.strip()
if not line:
continue
match = pattern.match(line)
if match:
groups = match.groups()
if amount_col < len(groups):
amount = parse_amount(groups[amount_col])
if amount is not None:
desc = groups[1] if len(groups) > 1 else ''
if amount >= 0:
positive_count += 1
positive_total += amount
else:
negative_count += 1
negative_total += abs(amount)
if len(sample_credits) < 10:
sample_credits.append((desc.strip(), amount))
else:
# Standard CSV
reader = csv.reader(f)
if has_header:
headers = next(reader, None)
desc_col = 1 # default
for idx, h in enumerate(headers or []):
hl = h.lower()
if 'desc' in hl or 'merchant' in hl or 'payee' in hl or 'name' in hl:
desc_col = idx
break
else:
desc_col = 1

for i, row in enumerate(reader):
if i >= max_rows:
break
if amount_col < len(row):
amount = parse_amount(row[amount_col])
if amount is not None:
desc = row[desc_col] if desc_col < len(row) else ''
if amount >= 0:
positive_count += 1
positive_total += amount
else:
negative_count += 1
negative_total += abs(amount)
if len(sample_credits) < 10:
sample_credits.append((desc.strip(), amount))
except Exception:
return None
def _format_amount_distribution_line(count, sign_label, total):
"""Format an amount distribution line with correct singular/plural wording."""
noun = 'amount' if count == 1 else 'amounts'
return f"{count} {sign_label} {noun}, totaling ${total:,.2f}"

total_count = positive_count + negative_count
if total_count == 0:
return None

# Determine sign convention based on distribution
# Expenses positive: mostly positive amounts (typical credit card export)
# Expenses negative: mostly negative amounts (typical bank export)
positive_pct = positive_count / total_count * 100

if positive_pct > 70:
sign_convention = 'expenses_positive'
suggest_negate = False
rationale = "mostly positive amounts (expenses are positive)"
elif positive_pct < 30:
sign_convention = 'expenses_negative'
suggest_negate = True
rationale = "mostly negative amounts (expenses are negative)"
else:
# Mixed - harder to tell
if positive_total > negative_total:
sign_convention = 'expenses_positive'
suggest_negate = False
rationale = "total positive exceeds negative"
else:
sign_convention = 'expenses_negative'
suggest_negate = True
rationale = "total negative exceeds positive"
def _describe_amount_token_usage(analysis):
"""Explain how {amount} and {-amount} relate to the observed signs."""
if not analysis:
return ['Replace <amount token> in the format string template with the option you choose.']

return {
'positive_count': positive_count,
'negative_count': negative_count,
'positive_total': positive_total,
'negative_total': negative_total,
'positive_pct': positive_pct,
'sign_convention': sign_convention,
'suggest_negate': suggest_negate,
'rationale': rationale,
'sample_credits': sample_credits,
}
return [
"Use {amount} to keep amounts as they appear in the CSV.",
"Use {-amount} to flip the sign of amounts from the CSV.",
"Use {+amount} if you want all parsed amounts to be positive.",
'Replace <amount token> in the format string template with the option you choose.',
]


def _analyze_columns(filepath, has_header=True, max_rows=100, dialect=None):
Expand Down
59 changes: 59 additions & 0 deletions tests/test_inspect.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests for inspect command - CSV sniffing and column analysis."""

from argparse import Namespace
import csv
import pytest
import tempfile
Expand All @@ -10,6 +11,7 @@
_analyze_columns,
_analyze_amount_column_detailed,
_detect_currency_symbol,
cmd_inspect,
)


Expand Down Expand Up @@ -948,3 +950,60 @@ def test_tab_delimiter(self):
assert spec.delimiter == '\t'
finally:
os.unlink(tmpfile)


class TestCmdInspect:
"""Regression tests for inspect command amount guidance."""

def test_reports_negative_amount_observations(self, capsys):
"""Exports with mostly negative amounts should report the observed sign counts."""
csv_content = """Date,Description,Amount
01/15/2025,GROCERY STORE,-123.45
01/16/2025,PAYROLL,2500.00
01/17/2025,COFFEE SHOP,-5.99
01/18/2025,RENT,-1200.00
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(csv_content)
tmpfile = f.name

try:
cmd_inspect(Namespace(file=tmpfile, rows=3))
captured = capsys.readouterr()
assert 'Suggested format string template:' in captured.out
assert 'format: "{date:%m/%d/%Y}, {description}, <amount token>"' in captured.out
assert '1 positive amount, totaling $2,500.00' in captured.out
assert '3 negative amounts, totaling $1,329.44' in captured.out
assert 'Observed 1 positive and 3 negative non-zero amounts in the sampled rows.' in captured.out
assert "Use {amount} to keep amounts as they appear in the CSV." in captured.out
assert "Use {-amount} to flip the sign of amounts from the CSV." in captured.out
assert "Use {+amount} if you want all parsed amounts to be positive." in captured.out
assert "Replace <amount token> in the format string template with the option you choose." in captured.out
finally:
os.unlink(tmpfile)

def test_reports_positive_amount_observations(self, capsys):
"""Exports with mostly positive amounts should report the observed sign counts."""
csv_content = """Date,Description,Amount
01/15/2025,GROCERY STORE,123.45
01/16/2025,COFFEE SHOP,5.99
01/17/2025,GAS STATION,45.00
01/18/2025,PAYMENT,-500.00
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(csv_content)
tmpfile = f.name

try:
cmd_inspect(Namespace(file=tmpfile, rows=4))
captured = capsys.readouterr()
assert 'format: "{date:%m/%d/%Y}, {description}, <amount token>"' in captured.out
assert '3 positive amounts, totaling $174.44' in captured.out
assert '1 negative amount, totaling $500.00' in captured.out
assert 'Observed 3 positive and 1 negative non-zero amounts in the sampled rows.' in captured.out
assert "Use {amount} to keep amounts as they appear in the CSV." in captured.out
assert "Use {-amount} to flip the sign of amounts from the CSV." in captured.out
assert "Use {+amount} if you want all parsed amounts to be positive." in captured.out
assert "Replace <amount token> in the format string template with the option you choose." in captured.out
finally:
os.unlink(tmpfile)