-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cpp
More file actions
132 lines (125 loc) · 4.73 KB
/
main.cpp
File metadata and controls
132 lines (125 loc) · 4.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include <iostream>
#include <thread>
#include <string>
#include <algorithm>
#include <vector>
#include <map>
#include <set>
#include <memory>
#include <mutex>
#include <numeric>
#include "read_config.hpp"
#include "measure_time.hpp"
#include "read_from_file.hpp"
#include "boundary_analysis.hpp"
#include "count_token_usage.hpp"
typedef std::pair<std::string, size_t> pair;
int main(int argc, char **argv) {
// help info
if (argc == 2 && std::string(argv[1]) == "--help") {
std::cout << "Description\n" <<
"$ ./parallel_indexing <path_to_config_file>\n";
return 0;
}
std::string filename("config.dat");
// user's config file
if (argc == 2) {
filename = std::string(argv[1]);
}
std::ifstream config_stream(filename);
if (!config_stream.is_open()) {
std::cerr << "Failed to open configuration file " << filename << std::endl;
return 1;
}
config_data_t conf_data;
try {
read_config_data(config_stream, conf_data);
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;
return 2;
}
// check output files
std::ofstream output_alphabet(conf_data.output_alphabet_order);
if (!config_stream.is_open()) {
std::cerr << "Failed to open file for alphabet order result" << std::endl;
return 1;
}
std::ofstream output_count(conf_data.output_count_order);
if (!config_stream.is_open()) {
std::cerr << "Failed to open file for count order result " << std::endl;
return 1;
}
#ifdef DEBUG
std::cout << "Input filename " << conf_data.input_file_name << "." << std::endl;
std::cout << "Output alphabet order filename " << conf_data.output_alphabet_order << "." << std::endl;
std::cout << "Output count order filename " << conf_data.output_count_order << "." << std::endl;
std::cout << "Thread num to utilize " << conf_data.thread_num << "." << std::endl;
#endif
auto start_reading = get_current_time_fenced();
std::vector<std::string> file_data;
get_file_content(file_data, conf_data.input_file_name);
auto finish_reading = get_current_time_fenced();
#ifdef DEBUG
for (const auto &v: file_data) {
std::cout << v << std::endl;
}
#endif
std::vector<std::thread> thread_list;
thread_list.reserve(conf_data.thread_num);
std::vector<std::unique_ptr<std::map<std::string, size_t>>> list;
std::mutex list_mtx;
auto start_counting = get_current_time_fenced();
// create set with tokens from input text file(s)
std::vector<std::string> tokens_list;
parse(file_data, tokens_list);
#ifdef DEBUG
for (const auto & v: tokens_list) {
std::cout << v << std::endl;
}
#endif
for (size_t i = 0; i < conf_data.thread_num; ++i) {
thread_list.emplace_back(token_usage,std::ref(file_data), std::ref(tokens_list), std::ref(list),
i, conf_data.thread_num, std::ref(list_mtx));
}
for (auto &v: thread_list) v.join();
auto finish_counting = get_current_time_fenced();
// merge all maps in first
std::map<std::string, size_t> &final_map = *list[0];
std::for_each(list.begin() + 1, list.end(), [&](const std::unique_ptr<std::map<std::string, size_t>> &a) {
for (const auto &v: *a) list[0]->operator[](v.first) += v.second;
});
// remove merged maps
list.erase(list.begin() + 1, list.end());
#ifdef DEBUG
for (auto &v: final_map) {
std::cout << v.first << ": " << v.second << std::endl;
}
#endif
// create a empty vector of pairs
std::vector<std::pair<std::string, size_t>> sort_container(final_map.size());
// copy key-value pairs from the map to the vector
std::copy(final_map.begin(), final_map.end(), sort_container.begin());
// sort the pair by alphabet
std::sort(sort_container.begin(), sort_container.end(),
[](const pair &l, const pair &r) {
return l.first < r.first;
});
// write to output file
for (auto &v: sort_container) {
output_alphabet << v.first << ": " << v.second << std::endl;
}
// sort by usage count
std::sort(sort_container.begin(), sort_container.end(),
[](const pair &l, const pair &r) {
return l.second > r.second;
});
// write to output file
for (auto &v: sort_container) {
output_count << v.first << ": " << v.second << std::endl;
}
auto total_finish = get_current_time_fenced();
std::cout << "Total time: " << to_us(total_finish - start_reading) / 1000000.0 << std::endl;
std::cout << "Reading time: " << to_us(finish_reading - start_reading) / 1000000.0 << std::endl;
std::cout << "Counting time: " << to_us(finish_counting - start_counting) / 1000000.0 << std::endl;
return 0;
}