Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Support/TrigramIndex.cpp
Line
Count
Source
1
//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// TrigramIndex implements a heuristic for SpecialCaseList that allows to
10
// filter out ~99% incoming queries when all regular expressions in the
11
// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
12
// complicated, the check is defeated and it will always pass the queries to a
13
// full regex.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/Support/TrigramIndex.h"
18
#include "llvm/ADT/SmallVector.h"
19
20
#include <set>
21
#include <string>
22
#include <unordered_map>
23
24
using namespace llvm;
25
26
static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}";
27
28
9.19k
static bool isAdvancedMetachar(unsigned Char) {
29
9.19k
  return strchr(RegexAdvancedMetachars, Char) != nullptr;
30
9.19k
}
31
32
716
void TrigramIndex::insert(std::string Regex) {
33
716
  if (Defeated) 
return2
;
34
714
  std::set<unsigned> Was;
35
714
  unsigned Cnt = 0;
36
714
  unsigned Tri = 0;
37
714
  unsigned Len = 0;
38
714
  bool Escaped = false;
39
9.22k
  for (unsigned Char : Regex) {
40
9.22k
    if (!Escaped) {
41
9.20k
      // Regular expressions allow escaping symbols by preceding it with '\'.
42
9.20k
      if (Char == '\\') {
43
12
        Escaped = true;
44
12
        continue;
45
12
      }
46
9.19k
      if (isAdvancedMetachar(Char)) {
47
6
        // This is a more complicated regex than we can handle here.
48
6
        Defeated = true;
49
6
        return;
50
6
      }
51
9.19k
      if (Char == '.' || 
Char == '*'9.12k
) {
52
1.21k
        Tri = 0;
53
1.21k
        Len = 0;
54
1.21k
        continue;
55
1.21k
      }
56
7.98k
    }
57
7.98k
    if (Escaped && 
Char >= '1'12
&&
Char <= '9'5
) {
58
2
      Defeated = true;
59
2
      return;
60
2
    }
61
7.98k
    // We have already handled escaping and can reset the flag.
62
7.98k
    Escaped = false;
63
7.98k
    Tri = ((Tri << 8) + Char) & 0xFFFFFF;
64
7.98k
    Len++;
65
7.98k
    if (Len < 3)
66
1.44k
      continue;
67
6.54k
    // We don't want the index to grow too much for the popular trigrams,
68
6.54k
    // as they are weak signals. It's ok to still require them for the
69
6.54k
    // rules we have already processed. It's just a small additional
70
6.54k
    // computational cost.
71
6.54k
    if (Index[Tri].size() >= 4)
72
9
      continue;
73
6.53k
    Cnt++;
74
6.53k
    if (!Was.count(Tri)) {
75
6.48k
      // Adding the current rule to the index.
76
6.48k
      Index[Tri].push_back(Counts.size());
77
6.48k
      Was.insert(Tri);
78
6.48k
    }
79
6.53k
  }
80
714
  
if (706
!Cnt706
) {
81
207
    // This rule does not have remarkable trigrams to rely on.
82
207
    // We have to always call the full regex chain.
83
207
    Defeated = true;
84
207
    return;
85
207
  }
86
499
  Counts.push_back(Cnt);
87
499
}
88
89
3.06k
bool TrigramIndex::isDefinitelyOut(StringRef Query) const {
90
3.06k
  if (Defeated)
91
2.02k
    return false;
92
1.03k
  std::vector<unsigned> CurCounts(Counts.size());
93
1.03k
  unsigned Tri = 0;
94
25.5k
  for (size_t I = 0; I < Query.size(); 
I++24.4k
) {
95
24.6k
    Tri = ((Tri << 8) + Query[I]) & 0xFFFFFF;
96
24.6k
    if (I < 2)
97
2.05k
      continue;
98
22.6k
    const auto &II = Index.find(Tri);
99
22.6k
    if (II == Index.end())
100
14.1k
      continue;
101
8.54k
    
for (size_t J : II->second)8.44k
{
102
8.54k
      CurCounts[J]++;
103
8.54k
      // If we have reached a desired limit, we have to look at the query
104
8.54k
      // more closely by running a full regex.
105
8.54k
      if (CurCounts[J] >= Counts[J])
106
213
        return false;
107
8.54k
    }
108
8.44k
  }
109
1.03k
  
return true821
;
110
1.03k
}