/* * Copyright (C) 2017-2020 The Project X-Ray Authors. * * Use of this source code is governed by a ISC-style * license that can be found in the LICENSE file or at * https://opensource.org/licenses/ISC * * SPDX-License-Identifier: ISC */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include DEFINE_int32( c, 4, "threshold under which candidates are output. set to -1 to output all."); DEFINE_bool(i, false, "add inverted tags"); DEFINE_int32(m, 0, "min number of set/cleared samples each"); DEFINE_int32(M, 0, "min number of set/cleared samples total"); DEFINE_string(o, "", "set output file"); DEFINE_string(k, "", "set output mask file"); using std::map; using std::string; using std::tuple; using std::vector; int num_bits = 0, num_tags = 0; map bit_ids, tag_ids; vector bit_ids_r, tag_ids_r; #if 0 struct bool_vec { vector data; bool_vec(int n = 0, bool initval = false) : data(n, initval) { } void set(int n) { if (int(data.size()) <= n) data.resize(n+1); data[n] = true; } bool get(int n) { return data.at(n); } void resize(int n) { data.resize(n); } int count() { return std::accumulate(data.begin(), data.end(), 0); } void apply_and(const bool_vec &other) { assert(data.size() == other.data.size()); for (int i = 0; i < int(data.size()); i++) data[i] = data[i] && other.data[i]; } void apply_andc(const bool_vec &other) { assert(data.size() == other.data.size()); for (int i = 0; i < int(data.size()); i++) data[i] = data[i] && !other.data[i]; } }; #else struct bool_vec { vector data; bool_vec(int n = 0, bool initval = false) : data((n + 63) / 64, initval ? ~uint64_t(0) : uint64_t(0)) { for (int i = data.size() * 64 - 1; i >= n; i--) data[n / 64] &= ~(uint64_t(1) << (n % 64)); } void set(int n) { if (int(data.size() * 64) <= n) data.resize((n + 64) / 64); data[n / 64] |= uint64_t(1) << (n % 64); } bool get(int n) { return (data[n / 64] >> (n % 64)) & 1; } void resize(int n) { data.resize((n + 63) / 64); } int count() { int sum = 0; for (int i = 0; i < 64 * int(data.size()); i++) if (get(i)) sum++; return sum; } void apply_and(const bool_vec& other) { assert(data.size() == other.data.size()); for (int i = 0; i < int(data.size()); i++) data[i] &= other.data[i]; } void apply_andc(const bool_vec& other) { assert(data.size() == other.data.size()); for (int i = 0; i < int(data.size()); i++) data[i] &= ~other.data[i]; } }; #endif // segname -> bits, tags_on, tags_off typedef tuple segdata_t; map segdata; map segnamecnt; static inline bool_vec& segdata_bits(segdata_t& sd) { return std::get<0>(sd); } static inline bool_vec& segdata_tags1(segdata_t& sd) { return std::get<1>(sd); } static inline bool_vec& segdata_tags0(segdata_t& sd) { return std::get<2>(sd); } void read_input(std::istream& f, std::string filename) { string token; segdata_t* segptr = nullptr; while (f >> token) { if (token == "seg") { f >> token; token = filename + ":" + token; while (segdata.count(token)) { int idx = 1; if (segnamecnt.count(token)) idx = segnamecnt.at(token); segnamecnt[token] = idx + 1; char buffer[64]; snprintf(buffer, 64, "-%d", idx); token += buffer; } segptr = &segdata[token]; continue; } if (token == "bit") { assert(segptr != nullptr); f >> token; if (bit_ids.count(token) == 0) { bit_ids[token] = num_bits++; bit_ids_r.push_back(token); } int bit_idx = bit_ids.at(token); auto& bits = segdata_bits(*segptr); bits.set(bit_idx); continue; } if (token == "tag") { assert(segptr != nullptr); f >> token; if (tag_ids.count(token) == 0) { tag_ids[token] = num_tags++; tag_ids_r.push_back(token); } int tag_idx = tag_ids.at(token); f >> token; assert(token == "0" || token == "1"); auto& tags = token == "1" ? segdata_tags1(*segptr) : segdata_tags0(*segptr); tags.set(tag_idx); if (FLAGS_i) { auto& inv_tags = token == "1" ? segdata_tags0(*segptr) : segdata_tags1(*segptr); token = tag_ids_r.at(tag_idx) + "__INV"; if (tag_ids.count(token) == 0) { tag_ids[token] = num_tags++; tag_ids_r.push_back(token); } int inv_tag_idx = tag_ids.at(token); inv_tags.set(inv_tag_idx); } continue; } abort(); } // printf("Number of segments: %d\n", int(segdata.size())); // printf("Number of bits: %d\n", num_bits); // printf("Number of tags: %d\n", num_tags); for (auto& segdat : segdata) { segdata_bits(segdat.second).resize(num_bits); segdata_tags1(segdat.second).resize(num_tags); segdata_tags0(segdat.second).resize(num_tags); } } int main(int argc, char** argv) { gflags::SetUsageMessage( absl::StrCat("Usage: ", argv[0], " [options] file..")); gflags::ParseCommandLineFlags(&argc, &argv, true); if (argc > 1) { for (int optind = 1; optind < argc; optind++) { printf("Reading %s.\n", argv[optind]); std::ifstream f; f.open(argv[optind]); // Check if input file exists. if (!f.good()) { printf("ERROR: Input file does not exist!\n"); return -1; } assert(!f.fail()); read_input(f, argv[optind]); } } else { printf("Reading from stdin...\n"); read_input(std::cin, "stdin"); } printf("#of segments: %d\n", int(segdata.size())); printf("#of bits: %d\n", num_bits); printf("#of tags: %d\n", num_tags); FILE* f = stdout; if (!FLAGS_o.empty()) { f = fopen(FLAGS_o.c_str(), "w"); assert(f != nullptr); } int cnt_const0 = 0; int cnt_const1 = 0; int cnt_candidates = 0; int min_candidates = num_bits; int max_candidates = 0; float avg_candidates = 0; std::vector out_lines; for (int tag_idx = 0; tag_idx < num_tags; tag_idx++) { bool_vec mask(num_bits, true); int count1 = 0, count0 = 0; for (auto& segdat : segdata) { auto& sd = segdat.second; bool tag1 = segdata_tags1(sd).get(tag_idx); bool tag0 = segdata_tags0(sd).get(tag_idx); assert(!tag1 || !tag0); if (tag1) { count1++; mask.apply_and(segdata_bits(sd)); continue; } if (tag0) { count0++; mask.apply_andc(segdata_bits(sd)); continue; } } assert(count1 || count0); std::string out_line = tag_ids_r.at(tag_idx); if (count1 < FLAGS_m) { char buffer[64]; snprintf(buffer, 64, " ", count1); out_line += buffer; } if (count0 < FLAGS_m) { char buffer[64]; snprintf(buffer, 64, " ", count0); out_line += buffer; } if (count1 + count0 < FLAGS_M) { char buffer[64]; snprintf(buffer, 64, " ", count1, count0); out_line += buffer; } if (!count1) { out_lines.push_back(out_line + " "); cnt_const0 += 1; continue; } if (!count0) { out_line += " "; cnt_const1 += 1; } int num_candidates = mask.count(); if (count0) { min_candidates = std::min(min_candidates, num_candidates); max_candidates = std::max(max_candidates, num_candidates); avg_candidates += num_candidates; cnt_candidates += 1; } if (FLAGS_c < 0 || (0 < num_candidates && num_candidates <= FLAGS_c)) { std::vector out_tags; for (int bit_idx = 0; bit_idx < num_bits; bit_idx++) if (mask.get(bit_idx)) out_tags.push_back( bit_ids_r.at(bit_idx)); std::sort(out_tags.begin(), out_tags.end()); for (auto& tag : out_tags) out_line += " " + tag; } else { char buffer[64]; snprintf(buffer, 64, " <%d candidates>", num_candidates); out_line += buffer; } out_lines.push_back(out_line); } std::sort(out_lines.begin(), out_lines.end()); for (auto& line : out_lines) fprintf(f, "%s\n", line.c_str()); if (cnt_candidates) avg_candidates /= cnt_candidates; printf("#of const0 tags: %d\n", cnt_const0); printf("#of const1 tags: %d\n", cnt_const1); if (cnt_candidates) { printf("min #of candidates: %d\n", min_candidates); printf("max #of candidates: %d\n", max_candidates); printf("avg #of candidates: %.3f\n", avg_candidates); } if (!FLAGS_k.empty()) { f = fopen(FLAGS_k.c_str(), "w"); assert(f != nullptr); std::sort(bit_ids_r.begin(), bit_ids_r.end()); for (auto bit : bit_ids_r) fprintf(f, "bit %s\n", bit.c_str()); } return 0; }