diff --git a/experiments/intpips/.gitignore b/experiments/intpips/.gitignore index 932efba0..6fb5047a 100644 --- a/experiments/intpips/.gitignore +++ b/experiments/intpips/.gitignore @@ -1,2 +1,2 @@ /specimen_[0-9][0-9][0-9]/ -/seg_clbl[lm].segbits +/seg_int.segbits diff --git a/experiments/intpips/Makefile b/experiments/intpips/Makefile index d2111cbc..ec76de7f 100644 --- a/experiments/intpips/Makefile +++ b/experiments/intpips/Makefile @@ -1,19 +1,16 @@ -N := 5 +N := 15 SPECIMENS := $(addprefix specimen_,$(shell seq -f '%03.0f' $(N))) SPECIMENS_OK := $(addsuffix /OK,$(SPECIMENS)) -database: database/clbll database/clblm +database: $(SPECIMENS_OK) + ../../tools/segmatch -m 5 -M 15 -o seg_int.segbits \ + $(addsuffix /segdata_clbll.txt,$(SPECIMENS)) \ + $(addsuffix /segdata_clblm.txt,$(SPECIMENS)) -pushdb: pushdb/clbll pushdb/clblm - -database/%: $(SPECIMENS_OK) - ../../tools/segmatch -o seg_$(notdir $@).segbits \ - $(addsuffix /segdata_$(notdir $@).txt,$(SPECIMENS)) - -pushdb/%: - bash ../../utils/mergedb.sh seg_$(notdir $@).segbits \ - ../../database/$(XRAY_DATABASE)/seg_$(notdir $@).segbits +pushdb: + bash ../../utils/mergedb.sh seg_int.segbits \ + ../../database/$(XRAY_DATABASE)/seg_int.segbits $(SPECIMENS_OK): bash generate.sh $(subst /OK,,$@) diff --git a/experiments/intpips/generate.py b/experiments/intpips/generate.py index 495a3709..64552f57 100644 --- a/experiments/intpips/generate.py +++ b/experiments/intpips/generate.py @@ -41,9 +41,9 @@ for tile, pips_srcs_dsts in tiledata.items(): for pip, src_dst in pipdata.items(): if pip in pips: - segmk.addtag(tile, pip, 1) + segmk.addtag(tile, "%s.%s" % (src_dst[1], src_dst[0]), 1) elif src_dst[1] not in dsts: - segmk.addtag(tile, pip, 0) + segmk.addtag(tile, "%s.%s" % (src_dst[1], src_dst[0]), 0) segmk.compile() segmk.write() diff --git a/tools/segmatch.cc b/tools/segmatch.cc index 99ace12e..4c5f2a20 100644 --- a/tools/segmatch.cc +++ b/tools/segmatch.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -140,14 +141,21 @@ void andc_masks(vector &dst_mask, const vector &src_mask) int main(int argc, char **argv) { const char *outfile = nullptr; + int min_each = 0, min_total = 0; int opt; - while ((opt = getopt(argc, argv, "io:")) != -1) + while ((opt = getopt(argc, argv, "io:m:M:")) != -1) switch (opt) { case 'o': outfile = optarg; break; + case 'm': + min_each = atoi(optarg); + break; + case 'M': + min_total = atoi(optarg); + break; case 'i': mode_inv = true; break; @@ -163,6 +171,12 @@ help: fprintf(stderr, " -o \n"); fprintf(stderr, " set output file\n"); fprintf(stderr, "\n"); + fprintf(stderr, " -m \n"); + fprintf(stderr, " min number of set/cleared samples each\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -M \n"); + fprintf(stderr, " min number of set/cleared samples total\n"); + fprintf(stderr, "\n"); fprintf(stderr, " -i\n"); fprintf(stderr, " add inverted tags\n"); fprintf(stderr, "\n"); @@ -200,10 +214,12 @@ help: int max_candidates = 0; float avg_candidates = 0; + std::vector out_lines; + for (int tag_idx = 0; tag_idx < num_tags; tag_idx++) { vector mask(num_bits, true); - bool got1 = false, got0 = false; + int count1 = 0, count0 = 0; for (auto &segdat : segdata) { @@ -214,36 +230,54 @@ help: assert(!tag1 || !tag0); if (tag1) { - got1 = true; + count1++; and_masks(mask, segdata_bits(sd)); continue; } if (tag0) { - got0 = true; + count0++; andc_masks(mask, segdata_bits(sd)); continue; } } - assert(got1 || got0); + assert(count1 || count0); - fprintf(f, "%s", tag_ids_r.at(tag_idx).c_str()); + std::string out_line = tag_ids_r.at(tag_idx); - if (!got1) { - fprintf(f, " \n"); + if (count1 < min_each) { + char buffer[64]; + snprintf(buffer, 64, " ", count1); + out_line += buffer; + } + + if (count0 < min_each) { + char buffer[64]; + snprintf(buffer, 64, " ", count0); + out_line += buffer; + } + + if (count1 + count0 < min_total) { + char buffer[64]; + snprintf(buffer, 64, " ", count1, count0); + out_line += buffer; + } + + if (!count1) { + out_lines.push_back(out_line + " "); cnt_const0 += 1; continue; } - if (!got0) { - fprintf(f, " "); + if (!count0) { + out_line += " "; cnt_const1 += 1; } int num_candidates = std::accumulate(mask.begin(), mask.end(), 0); - if (got0) { + if (count0) { min_candidates = std::min(min_candidates, num_candidates); max_candidates = std::max(max_candidates, num_candidates); avg_candidates += num_candidates; @@ -251,15 +285,27 @@ help: } if (0 < num_candidates && num_candidates <= 4) { + std::vector out_tags; for (int bit_idx = 0; bit_idx < num_bits; bit_idx++) if (mask.at(bit_idx)) - fprintf(f, " %s", bit_ids_r.at(bit_idx).c_str()); - fprintf(f, "\n"); + out_tags.push_back(bit_ids_r.at(bit_idx)); + std::sort(out_tags.begin(), out_tags.end()); + for (auto &tag : out_tags) + out_line += " " + tag; } else { - fprintf(f, " <%d candidates>\n", num_candidates); + char buffer[64]; + snprintf(buffer, 64, " <%d candidates>", num_candidates); + out_line += buffer; } + + out_lines.push_back(out_line); } + std::sort(out_lines.begin(), out_lines.end()); + + for (auto &line : out_lines) + fprintf(f, "%s\n", line.c_str()); + if (cnt_candidates) avg_candidates /= cnt_candidates; diff --git a/utils/mergedb.sh b/utils/mergedb.sh index 37f5b0fa..0835073a 100644 --- a/utils/mergedb.sh +++ b/utils/mergedb.sh @@ -4,5 +4,5 @@ test $# = 2 test -e "$1" touch "$2" tmp=`mktemp -p .` -sort -u "$1" "$2" > "$tmp" +sort -u "$1" "$2" | grep -v '<.*>' > "$tmp" mv "$tmp" "$2"