-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcode_stats.py
More file actions
152 lines (128 loc) · 4.87 KB
/
code_stats.py
File metadata and controls
152 lines (128 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import re
from collections import Counter
def extract_words(text):
"""提取文本中的英语单词"""
# 匹配英语单词的正则表达式,包括驼峰命名法拆分
pattern = r'[A-Z]?[a-z]+|[A-Z]{2,}(?=[A-Z][a-z]|\d|\W|$)|\d+'
words = re.findall(pattern, text)
# 转换为小写并过滤掉数字
return [word.lower() for word in words if not word.isdigit()]
def count_code_lines(file_path):
"""统计单个文件的代码行数,排除空行和注释行,并统计单词"""
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.splitlines()
total_lines = len(lines)
empty_lines = len([l for l in lines if l.strip() == ''])
# 计算注释行(包括单行和多行注释)
comment_lines = 0
in_block_comment = False
for line in lines:
line = line.strip()
# 跳过空行
if not line:
continue
# 处理块注释
if '/*' in line and '*/' in line:
comment_lines += 1
elif '/*' in line:
in_block_comment = True
comment_lines += 1
elif '*/' in line:
in_block_comment = False
comment_lines += 1
elif in_block_comment:
comment_lines += 1
# 处理单行注释
elif line.startswith('//'):
comment_lines += 1
# 统计单词
words = extract_words(content)
word_count = Counter(words)
code_lines = total_lines - empty_lines - comment_lines
return {
'total': total_lines,
'code': code_lines,
'comment': comment_lines,
'empty': empty_lines,
'words': word_count
}
def scan_directory(directory):
"""扫描目录下的所有Java文件"""
stats = {
'files': 0,
'total_lines': 0,
'code_lines': 0,
'comment_lines': 0,
'empty_lines': 0,
'word_stats': Counter(),
'file_details': []
}
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.java'):
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, directory)
try:
file_stats = count_code_lines(file_path)
stats['files'] += 1
stats['total_lines'] += file_stats['total']
stats['code_lines'] += file_stats['code']
stats['comment_lines'] += file_stats['comment']
stats['empty_lines'] += file_stats['empty']
stats['word_stats'].update(file_stats['words'])
stats['file_details'].append({
'path': relative_path,
'stats': file_stats
})
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
return stats
def generate_report(stats):
"""生成统计报告"""
report = []
report.append("代码统计报告")
report.append("=" * 50)
# 总体统计
report.append("\n总体统计:")
report.append("-" * 50)
report.append(f"文件总数: {stats['files']}")
report.append(f"总行数: {stats['total_lines']}")
report.append(f"代码行数: {stats['code_lines']}")
report.append(f"注释行数: {stats['comment_lines']}")
report.append(f"空行数: {stats['empty_lines']}")
report.append(f"总单词数: {sum(stats['word_stats'].values())}")
report.append(f"不同单词数: {len(stats['word_stats'])}")
# 文件列表
report.append("\n文件列表:")
report.append("-" * 50)
sorted_files = sorted(stats['file_details'],
key=lambda x: x['stats']['code'],
reverse=True)
for file_detail in sorted_files:
report.append(f"{file_detail['path']} ({file_detail['stats']['code']} 行代码)")
# 全局单词统计
# report.append("\n全局单词统计:")
# report.append("-" * 50)
# report.append("最常用的单词 (出现次数 > 10):")
# for word, count in stats['word_stats'].most_common():
# if count > 10:
# report.append(f" {word}: {count}")
return "\n".join(report)
def main():
# 指定要统计的Java代码目录
java_dir = "app/src/main/java"
if not os.path.exists(java_dir):
print(f"目录不存在: {java_dir}")
return
print(f"正在统计目录: {java_dir}")
stats = scan_directory(java_dir)
report = generate_report(stats)
# 保存报告到文件
with open('code_stats_report.txt', 'w', encoding='utf-8') as f:
f.write(report)
print("\n报告已生成到 code_stats_report.txt")
print(report)
if __name__ == "__main__":
main()