Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Support/Regex.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements a POSIX regular expression matcher.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/Support/Regex.h"
14
#include "llvm/ADT/SmallVector.h"
15
#include "llvm/ADT/StringRef.h"
16
#include "llvm/ADT/Twine.h"
17
#include <string>
18
19
// Important this comes last because it defines "_REGEX_H_". At least on
20
// Darwin, if included before any header that (transitively) includes
21
// xlocale.h, this will cause trouble, because of missing regex-related types.
22
#include "regex_impl.h"
23
24
using namespace llvm;
25
26
2
Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
27
28
689k
Regex::Regex(StringRef regex, unsigned Flags) {
29
689k
  unsigned flags = 0;
30
689k
  preg = new llvm_regex();
31
689k
  preg->re_endp = regex.end();
32
689k
  if (Flags & IgnoreCase)
33
2.66k
    flags |= REG_ICASE;
34
689k
  if (Flags & Newline)
35
257
    flags |= REG_NEWLINE;
36
689k
  if (!(Flags & BasicRegex))
37
689k
    flags |= REG_EXTENDED;
38
689k
  error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
39
689k
}
40
41
17.0k
Regex::Regex(Regex &&regex) {
42
17.0k
  preg = regex.preg;
43
17.0k
  error = regex.error;
44
17.0k
  regex.preg = nullptr;
45
17.0k
  regex.error = REG_BADPAT;
46
17.0k
}
47
48
706k
Regex::~Regex() {
49
706k
  if (preg) {
50
688k
    llvm_regfree(preg);
51
688k
    delete preg;
52
688k
  }
53
706k
}
54
55
3.21k
bool Regex::isValid(std::string &Error) const {
56
3.21k
  if (!error)
57
3.19k
    return true;
58
13
59
13
  size_t len = llvm_regerror(error, preg, nullptr, 0);
60
13
61
13
  Error.resize(len - 1);
62
13
  llvm_regerror(error, preg, &Error[0], len);
63
13
  return false;
64
13
}
65
66
/// getNumMatches - In a valid regex, return the number of parenthesized
67
/// matches it contains.
68
247
unsigned Regex::getNumMatches() const {
69
247
  return preg->re_nsub;
70
247
}
71
72
36.1M
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
73
36.1M
  if (error)
74
756k
    return false;
75
35.3M
76
35.3M
  unsigned nmatch = Matches ? 
preg->re_nsub+187.8k
:
035.2M
;
77
35.3M
78
35.3M
  // pmatch needs to have at least one element.
79
35.3M
  SmallVector<llvm_regmatch_t, 8> pm;
80
35.3M
  pm.resize(nmatch > 0 ? 
nmatch87.8k
:
135.2M
);
81
35.3M
  pm[0].rm_so = 0;
82
35.3M
  pm[0].rm_eo = String.size();
83
35.3M
84
35.3M
  int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
85
35.3M
86
35.3M
  if (rc == REG_NOMATCH)
87
35.3M
    
return false35.2M
;
88
96.0k
  if (rc != 0) {
89
0
    // regexec can fail due to invalid pattern or running out of memory.
90
0
    error = rc;
91
0
    return false;
92
0
  }
93
96.0k
94
96.0k
  // There was a match.
95
96.0k
96
96.0k
  if (Matches) { // match position requested
97
2.71k
    Matches->clear();
98
2.71k
99
11.5k
    for (unsigned i = 0; i != nmatch; 
++i8.85k
) {
100
8.85k
      if (pm[i].rm_so == -1) {
101
1.55k
        // this group didn't match
102
1.55k
        Matches->push_back(StringRef());
103
1.55k
        continue;
104
1.55k
      }
105
7.29k
      assert(pm[i].rm_eo >= pm[i].rm_so);
106
7.29k
      Matches->push_back(StringRef(String.data()+pm[i].rm_so,
107
7.29k
                                   pm[i].rm_eo-pm[i].rm_so));
108
7.29k
    }
109
2.71k
  }
110
96.0k
111
96.0k
  return true;
112
96.0k
}
113
114
std::string Regex::sub(StringRef Repl, StringRef String,
115
73
                       std::string *Error) {
116
73
  SmallVector<StringRef, 8> Matches;
117
73
118
73
  // Reset error, if given.
119
73
  if (Error && 
!Error->empty()72
)
*Error = ""1
;
120
73
121
73
  // Return the input if there was no match.
122
73
  if (!match(String, &Matches))
123
52
    return String;
124
21
125
21
  // Otherwise splice in the replacement string, starting with the prefix before
126
21
  // the match.
127
21
  std::string Res(String.begin(), Matches[0].begin());
128
21
129
21
  // Then the replacement string, honoring possible substitutions.
130
40
  while (!Repl.empty()) {
131
24
    // Skip to the next escape.
132
24
    std::pair<StringRef, StringRef> Split = Repl.split('\\');
133
24
134
24
    // Add the skipped substring.
135
24
    Res += Split.first;
136
24
137
24
    // Check for terminimation and trailing backslash.
138
24
    if (Split.second.empty()) {
139
5
      if (Repl.size() != Split.first.size() &&
140
5
          
Error1
&&
Error->empty()1
)
141
1
        *Error = "replacement string contained trailing backslash";
142
5
      break;
143
5
    }
144
19
145
19
    // Otherwise update the replacement string and interpret escapes.
146
19
    Repl = Split.second;
147
19
148
19
    // FIXME: We should have a StringExtras function for mapping C99 escapes.
149
19
    switch (Repl[0]) {
150
19
      // Treat all unrecognized characters as self-quoting.
151
19
    default:
152
2
      Res += Repl[0];
153
2
      Repl = Repl.substr(1);
154
2
      break;
155
19
156
19
      // Single character escapes.
157
19
    case 't':
158
1
      Res += '\t';
159
1
      Repl = Repl.substr(1);
160
1
      break;
161
19
    case 'n':
162
1
      Res += '\n';
163
1
      Repl = Repl.substr(1);
164
1
      break;
165
19
166
19
      // Decimal escapes are backreferences.
167
19
    
case '0': 15
case '1': 15
case '2': 15
case '3': 15
case '4':
168
15
    case '5': case '6': case '7': case '8': case '9': {
169
15
      // Extract the backreference number.
170
15
      StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
171
15
      Repl = Repl.substr(Ref.size());
172
15
173
15
      unsigned RefValue;
174
15
      if (!Ref.getAsInteger(10, RefValue) &&
175
15
          RefValue < Matches.size())
176
14
        Res += Matches[RefValue];
177
1
      else if (Error && Error->empty())
178
1
        *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
179
15
      break;
180
15
    }
181
19
    }
182
19
  }
183
21
184
21
  // And finally the suffix.
185
21
  Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
186
21
187
21
  return Res;
188
21
}
189
190
// These are the special characters matched in functions like "p_ere_exp".
191
static const char RegexMetachars[] = "()^$|*+?.[]\\{}";
192
193
809
bool Regex::isLiteralERE(StringRef Str) {
194
809
  // Check for regex metacharacters.  This list was derived from our regex
195
809
  // implementation in regcomp.c and double checked against the POSIX extended
196
809
  // regular expression specification.
197
809
  return Str.find_first_of(RegexMetachars) == StringRef::npos;
198
809
}
199
200
3.58k
std::string Regex::escape(StringRef String) {
201
3.58k
  std::string RegexStr;
202
101k
  for (unsigned i = 0, e = String.size(); i != e; 
++i98.3k
) {
203
98.3k
    if (strchr(RegexMetachars, String[i]))
204
2.08k
      RegexStr += '\\';
205
98.3k
    RegexStr += String[i];
206
98.3k
  }
207
3.58k
208
3.58k
  return RegexStr;
209
3.58k
}