Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/include/llvm/Support/TrigramIndex.h
Line
Count
Source (jump to first uncovered line)
1
//===-- TrigramIndex.h - a heuristic for SpecialCaseList --------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//===----------------------------------------------------------------------===//
8
//
9
// TrigramIndex implements a heuristic for SpecialCaseList that allows to
10
// filter out ~99% incoming queries when all regular expressions in the
11
// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
12
// complicated, the check is defeated and it will always pass the queries to a
13
// full regex.
14
//
15
// The basic idea is that in order for a wildcard to match a query, the query
16
// needs to have all trigrams which occur in the wildcard. We create a trigram
17
// index (trigram -> list of rules with it) and then count trigrams in the query
18
// for each rule. If the count for one of the rules reaches the expected value,
19
// the check passes the query to a regex. If none of the rules got enough
20
// trigrams, the check tells that the query is definitely not matched by any
21
// of the rules, and no regex matching is needed.
22
// A similar idea was used in Google Code Search as described in the blog post:
23
// https://swtch.com/~rsc/regexp/regexp4.html
24
//
25
//===----------------------------------------------------------------------===//
26
27
#ifndef LLVM_SUPPORT_TRIGRAMINDEX_H
28
#define LLVM_SUPPORT_TRIGRAMINDEX_H
29
30
#include "llvm/ADT/SmallVector.h"
31
#include "llvm/ADT/StringMap.h"
32
33
#include <string>
34
#include <unordered_map>
35
#include <vector>
36
37
namespace llvm {
38
class StringRef;
39
40
class TrigramIndex {
41
 public:
42
  /// Inserts a new Regex into the index.
43
  void insert(std::string Regex);
44
45
  /// Returns true, if special case list definitely does not have a line
46
  /// that matches the query. Returns false, if it's not sure.
47
  bool isDefinitelyOut(StringRef Query) const;
48
49
  /// Returned true, iff the heuristic is defeated and not useful.
50
  /// In this case isDefinitelyOut always returns false.
51
0
  bool isDefeated() { return Defeated; }
52
 private:
53
  // If true, the rules are too complicated for the check to work, and full
54
  // regex matching is needed for every rule.
55
  bool Defeated = false;
56
  // The minimum number of trigrams which should match for a rule to have a
57
  // chance to match the query. The number of elements equals the number of
58
  // regex rules in the SpecialCaseList.
59
  std::vector<unsigned> Counts;
60
  // Index holds a list of rules indices for each trigram. The same indices
61
  // are used in Counts to store per-rule limits.
62
  // If a trigram is too common (>4 rules with it), we stop tracking it,
63
  // which increases the probability for a need to match using regex, but
64
  // decreases the costs in the regular case.
65
  std::unordered_map<unsigned, SmallVector<size_t, 4>> Index{256};
66
};
67
68
}  // namespace llvm
69
70
#endif  // LLVM_SUPPORT_TRIGRAMINDEX_H