Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This pass inserts intrinsics to handle small types that would otherwise be
11
/// promoted during legalization. Here we can manually promote types or insert
12
/// intrinsics which can handle narrow types that aren't supported by the
13
/// register classes.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "ARM.h"
18
#include "ARMSubtarget.h"
19
#include "ARMTargetMachine.h"
20
#include "llvm/ADT/StringRef.h"
21
#include "llvm/CodeGen/Passes.h"
22
#include "llvm/CodeGen/TargetPassConfig.h"
23
#include "llvm/IR/Attributes.h"
24
#include "llvm/IR/BasicBlock.h"
25
#include "llvm/IR/IRBuilder.h"
26
#include "llvm/IR/Constants.h"
27
#include "llvm/IR/InstrTypes.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/IntrinsicInst.h"
31
#include "llvm/IR/Intrinsics.h"
32
#include "llvm/IR/Type.h"
33
#include "llvm/IR/Value.h"
34
#include "llvm/IR/Verifier.h"
35
#include "llvm/Pass.h"
36
#include "llvm/Support/Casting.h"
37
#include "llvm/Support/CommandLine.h"
38
39
#define DEBUG_TYPE "arm-codegenprepare"
40
41
using namespace llvm;
42
43
static cl::opt<bool>
44
DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true),
45
           cl::desc("Disable ARM specific CodeGenPrepare pass"));
46
47
static cl::opt<bool>
48
EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false),
49
          cl::desc("Use DSP instructions for scalar operations"));
50
51
static cl::opt<bool>
52
EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
53
                   cl::desc("Use DSP instructions for scalar operations\
54
                            with immediate operands"));
55
56
// The goal of this pass is to enable more efficient code generation for
57
// operations on narrow types (i.e. types with < 32-bits) and this is a
58
// motivating IR code example:
59
//
60
//   define hidden i32 @cmp(i8 zeroext) {
61
//     %2 = add i8 %0, -49
62
//     %3 = icmp ult i8 %2, 3
63
//     ..
64
//   }
65
//
66
// The issue here is that i8 is type-legalized to i32 because i8 is not a
67
// legal type. Thus, arithmetic is done in integer-precision, but then the
68
// byte value is masked out as follows:
69
//
70
//   t19: i32 = add t4, Constant:i32<-49>
71
//     t24: i32 = and t19, Constant:i32<255>
72
//
73
// Consequently, we generate code like this:
74
//
75
//   subs  r0, #49
76
//   uxtb  r1, r0
77
//   cmp r1, #3
78
//
79
// This shows that masking out the byte value results in generation of
80
// the UXTB instruction. This is not optimal as r0 already contains the byte
81
// value we need, and so instead we can just generate:
82
//
83
//   sub.w r1, r0, #49
84
//   cmp r1, #3
85
//
86
// We achieve this by type promoting the IR to i32 like so for this example:
87
//
88
//   define i32 @cmp(i8 zeroext %c) {
89
//     %0 = zext i8 %c to i32
90
//     %c.off = add i32 %0, -49
91
//     %1 = icmp ult i32 %c.off, 3
92
//     ..
93
//   }
94
//
95
// For this to be valid and legal, we need to prove that the i32 add is
96
// producing the same value as the i8 addition, and that e.g. no overflow
97
// happens.
98
//
99
// A brief sketch of the algorithm and some terminology.
100
// We pattern match interesting IR patterns:
101
// - which have "sources": instructions producing narrow values (i8, i16), and
102
// - they have "sinks": instructions consuming these narrow values.
103
//
104
// We collect all instruction connecting sources and sinks in a worklist, so
105
// that we can mutate these instruction and perform type promotion when it is
106
// legal to do so.
107
108
namespace {
109
class IRPromoter {
110
  SmallPtrSet<Value*, 8> NewInsts;
111
  SmallPtrSet<Instruction*, 4> InstsToRemove;
112
  DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
113
  SmallPtrSet<Value*, 8> Promoted;
114
  Module *M = nullptr;
115
  LLVMContext &Ctx;
116
  // The type we promote to: always i32
117
  IntegerType *ExtTy = nullptr;
118
  // The type of the value that the search began from, either i8 or i16.
119
  // This defines the max range of the values that we allow in the promoted
120
  // tree.
121
  IntegerType *OrigTy = nullptr;
122
  SetVector<Value*> *Visited;
123
  SmallPtrSetImpl<Value*> *Sources;
124
  SmallPtrSetImpl<Instruction*> *Sinks;
125
  SmallPtrSetImpl<Instruction*> *SafeToPromote;
126
  SmallPtrSetImpl<Instruction*> *SafeWrap;
127
128
  void ReplaceAllUsersOfWith(Value *From, Value *To);
129
  void PrepareWrappingAdds(void);
130
  void ExtendSources(void);
131
  void ConvertTruncs(void);
132
  void PromoteTree(void);
133
  void TruncateSinks(void);
134
  void Cleanup(void);
135
136
public:
137
  IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
138
4.90k
                          ExtTy(Type::getInt32Ty(Ctx)) { }
139
140
141
  void Mutate(Type *OrigTy,
142
              SetVector<Value*> &Visited,
143
              SmallPtrSetImpl<Value*> &Sources,
144
              SmallPtrSetImpl<Instruction*> &Sinks,
145
              SmallPtrSetImpl<Instruction*> &SafeToPromote,
146
              SmallPtrSetImpl<Instruction*> &SafeWrap);
147
};
148
149
class ARMCodeGenPrepare : public FunctionPass {
150
  const ARMSubtarget *ST = nullptr;
151
  IRPromoter *Promoter = nullptr;
152
  std::set<Value*> AllVisited;
153
  SmallPtrSet<Instruction*, 8> SafeToPromote;
154
  SmallPtrSet<Instruction*, 4> SafeWrap;
155
156
  bool isSafeWrap(Instruction *I);
157
  bool isSupportedValue(Value *V);
158
  bool isLegalToPromote(Value *V);
159
  bool TryToPromote(Value *V);
160
161
public:
162
  static char ID;
163
  static unsigned TypeSize;
164
  Type *OrigTy = nullptr;
165
166
4.91k
  ARMCodeGenPrepare() : FunctionPass(ID) {}
167
168
4.90k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
169
4.90k
    AU.addRequired<TargetPassConfig>();
170
4.90k
  }
171
172
25.3k
  StringRef getPassName() const override { return "ARM IR optimizations"; }
173
174
  bool doInitialization(Module &M) override;
175
  bool runOnFunction(Function &F) override;
176
  bool doFinalization(Module &M) override;
177
};
178
179
}
180
181
2.83k
static bool GenerateSignBits(Value *V) {
182
2.83k
  if (auto *Arg = dyn_cast<Argument>(V))
183
291
    return Arg->hasSExtAttr();
184
2.54k
185
2.54k
  if (!isa<Instruction>(V))
186
452
    return false;
187
2.09k
188
2.09k
  unsigned Opc = cast<Instruction>(V)->getOpcode();
189
2.09k
  return Opc == Instruction::AShr || 
Opc == Instruction::SDiv2.08k
||
190
2.09k
         
Opc == Instruction::SRem2.08k
||
Opc == Instruction::SExt2.08k
||
191
2.09k
         
Opc == Instruction::SIToFP2.07k
;
192
2.09k
}
193
194
811
static bool EqualTypeSize(Value *V) {
195
811
  return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
196
811
}
197
198
2.03k
static bool LessOrEqualTypeSize(Value *V) {
199
2.03k
  return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
200
2.03k
}
201
202
559
static bool GreaterThanTypeSize(Value *V) {
203
559
  return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
204
559
}
205
206
1.59k
static bool LessThanTypeSize(Value *V) {
207
1.59k
  return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
208
1.59k
}
209
210
/// Some instructions can use 8- and 16-bit operands, and we don't need to
211
/// promote anything larger. We disallow booleans to make life easier when
212
/// dealing with icmps but allow any other integer that is <= 16 bits. Void
213
/// types are accepted so we can handle switches.
214
1.93k
static bool isSupportedType(Value *V) {
215
1.93k
  Type *Ty = V->getType();
216
1.93k
217
1.93k
  // Allow voids and pointers, these won't be promoted.
218
1.93k
  if (Ty->isVoidTy() || 
Ty->isPointerTy()1.88k
)
219
47
    return true;
220
1.88k
221
1.88k
  if (auto *Ld = dyn_cast<LoadInst>(V))
222
169
    Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
223
1.88k
224
1.88k
  if (!isa<IntegerType>(Ty) ||
225
1.88k
      cast<IntegerType>(V->getType())->getBitWidth() == 1)
226
14
    return false;
227
1.87k
228
1.87k
  return LessOrEqualTypeSize(V);
229
1.87k
}
230
231
/// Return true if the given value is a source in the use-def chain, producing
232
/// a narrow 'TypeSize' value. These values will be zext to start the promotion
233
/// of the tree to i32. We guarantee that these won't populate the upper bits
234
/// of the register. ZExt on the loads will be free, and the same for call
235
/// return values because we only accept ones that guarantee a zeroext ret val.
236
/// Many arguments will have the zeroext attribute too, so those would be free
237
/// too.
238
9.18k
static bool isSource(Value *V) {
239
9.18k
  if (!isa<IntegerType>(V->getType()))
240
198
    return false;
241
8.98k
242
8.98k
  // TODO Allow zext to be sources.
243
8.98k
  if (isa<Argument>(V))
244
1.26k
    return true;
245
7.71k
  else if (isa<LoadInst>(V))
246
507
    return true;
247
7.21k
  else if (isa<BitCastInst>(V))
248
16
    return true;
249
7.19k
  else if (auto *Call = dyn_cast<CallInst>(V))
250
28
    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
251
7.16k
  else if (auto *Trunc = dyn_cast<TruncInst>(V))
252
478
    return EqualTypeSize(Trunc);
253
6.68k
  return false;
254
6.68k
}
255
256
/// Return true if V will require any promoted values to be truncated for the
257
/// the IR to remain valid. We can't mutate the value type of these
258
/// instructions.
259
7.39k
static bool isSink(Value *V) {
260
7.39k
  // TODO The truncate also isn't actually necessary because we would already
261
7.39k
  // proved that the data value is kept within the range of the original data
262
7.39k
  // type.
263
7.39k
264
7.39k
  // Sinks are:
265
7.39k
  // - points where the value in the register is being observed, such as an
266
7.39k
  //   icmp, switch or store.
267
7.39k
  // - points where value types have to match, such as calls and returns.
268
7.39k
  // - zext are included to ease the transformation and are generally removed
269
7.39k
  //   later on.
270
7.39k
  if (auto *Store = dyn_cast<StoreInst>(V))
271
68
    return LessOrEqualTypeSize(Store->getValueOperand());
272
7.32k
  if (auto *Return = dyn_cast<ReturnInst>(V))
273
94
    return LessOrEqualTypeSize(Return->getReturnValue());
274
7.23k
  if (auto *ZExt = dyn_cast<ZExtInst>(V))
275
559
    return GreaterThanTypeSize(ZExt);
276
6.67k
  if (auto *Switch = dyn_cast<SwitchInst>(V))
277
20
    return LessThanTypeSize(Switch->getCondition());
278
6.65k
  if (auto *ICmp = dyn_cast<ICmpInst>(V))
279
1.59k
    return ICmp->isSigned() || 
LessThanTypeSize(ICmp->getOperand(0))1.57k
;
280
5.06k
281
5.06k
  return isa<CallInst>(V);
282
5.06k
}
283
284
/// Return whether this instruction can safely wrap.
285
85
bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
286
85
  // We can support a, potentially, wrapping instruction (I) if:
287
85
  // - It is only used by an unsigned icmp.
288
85
  // - The icmp uses a constant.
289
85
  // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
290
85
  //   around zero to become a larger number than before.
291
85
  // - The wrapping instruction (I) also uses a constant.
292
85
  //
293
85
  // We can then use the two constants to calculate whether the result would
294
85
  // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
295
85
  // just underflows the range, the icmp would give the same result whether the
296
85
  // result has been truncated or not. We calculate this by:
297
85
  // - Zero extending both constants, if needed, to 32-bits.
298
85
  // - Take the absolute value of I's constant, adding this to the icmp const.
299
85
  // - Check that this value is not out of range for small type. If it is, it
300
85
  //   means that it has underflowed enough to wrap around the icmp constant.
301
85
  //
302
85
  // For example:
303
85
  //
304
85
  // %sub = sub i8 %a, 2
305
85
  // %cmp = icmp ule i8 %sub, 254
306
85
  //
307
85
  // If %a = 0, %sub = -2 == FE == 254
308
85
  // But if this is evalulated as a i32
309
85
  // %sub = -2 == FF FF FF FE == 4294967294
310
85
  // So the unsigned compares (i8 and i32) would not yield the same result.
311
85
  //
312
85
  // Another way to look at it is:
313
85
  // %a - 2 <= 254
314
85
  // %a + 2 <= 254 + 2
315
85
  // %a <= 256
316
85
  // And we can't represent 256 in the i8 format, so we don't support it.
317
85
  //
318
85
  // Whereas:
319
85
  //
320
85
  // %sub i8 %a, 1
321
85
  // %cmp = icmp ule i8 %sub, 254
322
85
  //
323
85
  // If %a = 0, %sub = -1 == FF == 255
324
85
  // As i32:
325
85
  // %sub = -1 == FF FF FF FF == 4294967295
326
85
  //
327
85
  // In this case, the unsigned compare results would be the same and this
328
85
  // would also be true for ult, uge and ugt:
329
85
  // - (255 < 254) == (0xFFFFFFFF < 254) == false
330
85
  // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
331
85
  // - (255 > 254) == (0xFFFFFFFF > 254) == true
332
85
  // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
333
85
  //
334
85
  // To demonstrate why we can't handle increasing values:
335
85
  //
336
85
  // %add = add i8 %a, 2
337
85
  // %cmp = icmp ult i8 %add, 127
338
85
  //
339
85
  // If %a = 254, %add = 256 == (i8 1)
340
85
  // As i32:
341
85
  // %add = 256
342
85
  //
343
85
  // (1 < 127) != (256 < 127)
344
85
345
85
  unsigned Opc = I->getOpcode();
346
85
  if (Opc != Instruction::Add && 
Opc != Instruction::Sub37
)
347
2
    return false;
348
83
349
83
  if (!I->hasOneUse() ||
350
83
      
!isa<ICmpInst>(*I->user_begin())68
||
351
83
      
!isa<ConstantInt>(I->getOperand(1))51
)
352
47
    return false;
353
36
354
36
  ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
355
36
  bool NegImm = OverflowConst->isNegative();
356
36
  bool IsDecreasing = ((Opc == Instruction::Sub) && 
!NegImm10
) ||
357
36
                       
(27
(Opc == Instruction::Add)27
&&
NegImm26
);
358
36
  if (!IsDecreasing)
359
8
    return false;
360
28
361
28
  // Don't support an icmp that deals with sign bits.
362
28
  auto *CI = cast<ICmpInst>(*I->user_begin());
363
28
  if (CI->isSigned() || CI->isEquality())
364
0
    return false;
365
28
366
28
  ConstantInt *ICmpConst = nullptr;
367
28
  if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
368
0
    ICmpConst = Const;
369
28
  else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
370
25
    ICmpConst = Const;
371
3
  else
372
3
    return false;
373
25
374
25
  // Now check that the result can't wrap on itself.
375
25
  APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
376
25
    ICmpConst->getValue().zext(32) : 
ICmpConst->getValue()0
;
377
25
378
25
  Total += OverflowConst->getValue().getBitWidth() < 32 ?
379
25
    OverflowConst->getValue().abs().zext(32) : 
OverflowConst->getValue().abs()0
;
380
25
381
25
  APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
382
25
383
25
  if (Total.getBitWidth() > Max.getBitWidth()) {
384
25
    if (Total.ugt(Max.zext(Total.getBitWidth())))
385
6
      return false;
386
0
  } else if (Max.getBitWidth() > Total.getBitWidth()) {
387
0
    if (Total.zext(Max.getBitWidth()).ugt(Max))
388
0
      return false;
389
0
  } else if (Total.ugt(Max))
390
0
    return false;
391
19
392
19
  LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
393
19
  SafeWrap.insert(I);
394
19
  return true;
395
19
}
396
397
4.67k
static bool shouldPromote(Value *V) {
398
4.67k
  if (!isa<IntegerType>(V->getType()) || 
isSink(V)4.47k
)
399
366
    return false;
400
4.30k
401
4.30k
  if (isSource(V))
402
470
    return true;
403
3.83k
404
3.83k
  auto *I = dyn_cast<Instruction>(V);
405
3.83k
  if (!I)
406
438
    return false;
407
3.39k
408
3.39k
  if (isa<ICmpInst>(I))
409
1.00k
    return false;
410
2.39k
411
2.39k
  return true;
412
2.39k
}
413
414
/// Return whether we can safely mutate V's type to ExtTy without having to be
415
/// concerned with zero extending or truncation.
416
900
static bool isPromotedResultSafe(Value *V) {
417
900
  if (GenerateSignBits(V))
418
0
    return false;
419
900
420
900
  if (!isa<Instruction>(V))
421
0
    return true;
422
900
423
900
  if (!isa<OverflowingBinaryOperator>(V))
424
612
    return true;
425
288
426
288
  return cast<Instruction>(V)->hasNoUnsignedWrap();
427
288
}
428
429
/// Return the intrinsic for the instruction that can perform the same
430
/// operation but on a narrow type. This is using the parallel dsp intrinsics
431
/// on scalar values.
432
24
static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
433
24
  // Whether we use the signed or unsigned versions of these intrinsics
434
24
  // doesn't matter because we're not using the GE bits that they set in
435
24
  // the APSR.
436
24
  switch(I->getOpcode()) {
437
24
  default:
438
0
    break;
439
24
  case Instruction::Add:
440
12
    return ARMCodeGenPrepare::TypeSize == 16 ? 
Intrinsic::arm_uadd163
:
441
12
      
Intrinsic::arm_uadd89
;
442
24
  case Instruction::Sub:
443
12
    return ARMCodeGenPrepare::TypeSize == 16 ? 
Intrinsic::arm_usub163
:
444
12
      
Intrinsic::arm_usub89
;
445
0
  }
446
0
  llvm_unreachable("unhandled opcode for narrow intrinsic");
447
0
}
448
449
428
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
450
428
  SmallVector<Instruction*, 4> Users;
451
428
  Instruction *InstTo = dyn_cast<Instruction>(To);
452
428
  bool ReplacedAll = true;
453
428
454
428
  LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
455
428
             << "\n");
456
428
457
904
  for (Use &U : From->uses()) {
458
904
    auto *User = cast<Instruction>(U.getUser());
459
904
    if (InstTo && User->isIdenticalTo(InstTo)) {
460
324
      ReplacedAll = false;
461
324
      continue;
462
324
    }
463
580
    Users.push_back(User);
464
580
  }
465
428
466
428
  for (auto *U : Users)
467
580
    U->replaceUsesOfWith(From, To);
468
428
469
428
  if (ReplacedAll)
470
109
    if (auto *I = dyn_cast<Instruction>(From))
471
109
      InstsToRemove.insert(I);
472
428
}
473
474
170
void IRPromoter::PrepareWrappingAdds() {
475
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
476
170
  IRBuilder<> Builder{Ctx};
477
170
478
170
  // For adds that safely wrap and use a negative immediate as operand 1, we
479
170
  // create an equivalent instruction using a positive immediate.
480
170
  // That positive immediate can then be zext along with all the other
481
170
  // immediates later.
482
170
  for (auto *I : *SafeWrap) {
483
18
    if (I->getOpcode() != Instruction::Add)
484
4
      continue;
485
14
486
14
    LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
487
14
    assert((isa<ConstantInt>(I->getOperand(1)) &&
488
14
            cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
489
14
           "Wrapping should have a negative immediate as the second operand");
490
14
491
14
    auto Const = cast<ConstantInt>(I->getOperand(1));
492
14
    auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
493
14
    Builder.SetInsertPoint(I);
494
14
    Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
495
14
    if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
496
14
      NewInst->copyIRFlags(I);
497
14
      NewInsts.insert(NewInst);
498
14
    }
499
14
    InstsToRemove.insert(I);
500
14
    I->replaceAllUsesWith(NewVal);
501
14
    LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
502
14
  }
503
170
  for (auto *I : NewInsts)
504
14
    Visited->insert(I);
505
170
}
506
507
170
void IRPromoter::ExtendSources() {
508
170
  IRBuilder<> Builder{Ctx};
509
170
510
319
  auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
511
319
    assert(V->getType() != ExtTy && "zext already extends to i32");
512
319
    LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
513
319
    Builder.SetInsertPoint(InsertPt);
514
319
    if (auto *I = dyn_cast<Instruction>(V))
515
113
      Builder.SetCurrentDebugLocation(I->getDebugLoc());
516
319
517
319
    Value *ZExt = Builder.CreateZExt(V, ExtTy);
518
319
    if (auto *I = dyn_cast<Instruction>(ZExt)) {
519
319
      if (isa<Argument>(V))
520
206
        I->moveBefore(InsertPt);
521
113
      else
522
113
        I->moveAfter(InsertPt);
523
319
      NewInsts.insert(I);
524
319
    }
525
319
526
319
    ReplaceAllUsersOfWith(V, ZExt);
527
319
  };
528
170
529
170
  // Now, insert extending instructions between the sources and their users.
530
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
531
319
  for (auto V : *Sources) {
532
319
    LLVM_DEBUG(dbgs() << " - " << *V << "\n");
533
319
    if (auto *I = dyn_cast<Instruction>(V))
534
113
      InsertZExt(I, I);
535
206
    else if (auto *Arg = dyn_cast<Argument>(V)) {
536
206
      BasicBlock &BB = Arg->getParent()->front();
537
206
      InsertZExt(Arg, &*BB.getFirstInsertionPt());
538
206
    } else {
539
0
      llvm_unreachable("unhandled source that needs extending");
540
0
    }
541
319
    Promoted.insert(V);
542
319
  }
543
170
}
544
545
170
void IRPromoter::PromoteTree() {
546
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
547
170
548
170
  IRBuilder<> Builder{Ctx};
549
170
550
170
  // Mutate the types of the instructions within the tree. Here we handle
551
170
  // constant operands.
552
1.14k
  for (auto *V : *Visited) {
553
1.14k
    if (Sources->count(V))
554
319
      continue;
555
830
556
830
    auto *I = cast<Instruction>(V);
557
830
    if (Sinks->count(I))
558
108
      continue;
559
722
560
2.18k
    
for (unsigned i = 0, e = I->getNumOperands(); 722
i < e;
++i1.46k
) {
561
1.46k
      Value *Op = I->getOperand(i);
562
1.46k
      if ((Op->getType() == ExtTy) || 
!isa<IntegerType>(Op->getType())667
)
563
811
        continue;
564
655
565
655
      if (auto *Const = dyn_cast<ConstantInt>(Op)) {
566
406
        Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
567
406
        I->setOperand(i, NewConst);
568
406
      } else 
if (249
isa<UndefValue>(Op)249
)
569
4
        I->setOperand(i, UndefValue::get(ExtTy));
570
655
    }
571
722
572
722
    if (shouldPromote(I)) {
573
505
      I->mutateType(ExtTy);
574
505
      Promoted.insert(I);
575
505
    }
576
722
  }
577
170
578
170
  // Finally, any instructions that should be promoted but haven't yet been,
579
170
  // need to be handled using intrinsics.
580
1.14k
  for (auto *V : *Visited) {
581
1.14k
    auto *I = dyn_cast<Instruction>(V);
582
1.14k
    if (!I)
583
206
      continue;
584
943
585
943
    if (Sources->count(I) || 
Sinks->count(I)830
)
586
221
      continue;
587
722
588
722
    if (!shouldPromote(I) || 
SafeToPromote->count(I)465
||
NewInsts.count(I)38
)
589
698
      continue;
590
24
591
24
    assert(EnableDSP && "DSP intrinisc insertion not enabled!");
592
24
593
24
    // Replace unsafe instructions with appropriate intrinsic calls.
594
24
    LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
595
24
               << *I << "\n");
596
24
    Function *DSPInst =
597
24
      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
598
24
    Builder.SetInsertPoint(I);
599
24
    Builder.SetCurrentDebugLocation(I->getDebugLoc());
600
24
    Value *Args[] = { I->getOperand(0), I->getOperand(1) };
601
24
    CallInst *Call = Builder.CreateCall(DSPInst, Args);
602
24
    NewInsts.insert(Call);
603
24
    ReplaceAllUsersOfWith(I, Call);
604
24
  }
605
170
}
606
607
170
void IRPromoter::TruncateSinks() {
608
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
609
170
610
170
  IRBuilder<> Builder{Ctx};
611
170
612
170
  auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
613
163
    if (!isa<Instruction>(V) || 
!isa<IntegerType>(V->getType())131
)
614
42
      return nullptr;
615
121
616
121
    if ((!Promoted.count(V) && 
!NewInsts.count(V)35
) ||
Sources->count(V)115
)
617
11
      return nullptr;
618
110
619
110
    LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
620
110
               << *V << "\n");
621
110
    Builder.SetInsertPoint(cast<Instruction>(V));
622
110
    auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
623
110
    if (Trunc)
624
110
      NewInsts.insert(Trunc);
625
110
    return Trunc;
626
110
  };
627
170
628
170
  // Fix up any stores or returns that use the results of the promoted
629
170
  // chain.
630
170
  for (auto I : *Sinks) {
631
122
    LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
632
122
633
122
    // Handle calls separately as we need to iterate over arg operands.
634
122
    if (auto *Call = dyn_cast<CallInst>(I)) {
635
38
      for (unsigned i = 0; i < Call->getNumArgOperands(); 
++i24
) {
636
24
        Value *Arg = Call->getArgOperand(i);
637
24
        Type *Ty = TruncTysMap[Call][i];
638
24
        if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
639
8
          Trunc->moveBefore(Call);
640
8
          Call->setArgOperand(i, Trunc);
641
8
        }
642
24
      }
643
14
      continue;
644
14
    }
645
108
646
108
    // Special case switches because we need to truncate the condition.
647
108
    if (auto *Switch = dyn_cast<SwitchInst>(I)) {
648
6
      Type *Ty = TruncTysMap[Switch][0];
649
6
      if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
650
6
        Trunc->moveBefore(Switch);
651
6
        Switch->setCondition(Trunc);
652
6
      }
653
6
      continue;
654
6
    }
655
102
656
102
    // Now handle the others.
657
235
    
for (unsigned i = 0; 102
i < I->getNumOperands();
++i133
) {
658
133
      Type *Ty = TruncTysMap[I][i];
659
133
      if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
660
96
        Trunc->moveBefore(I);
661
96
        I->setOperand(i, Trunc);
662
96
      }
663
133
    }
664
102
  }
665
170
}
666
667
170
void IRPromoter::Cleanup() {
668
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
669
170
  // Some zexts will now have become redundant, along with their trunc
670
170
  // operands, so remove them
671
1.14k
  for (auto V : *Visited) {
672
1.14k
    if (!isa<ZExtInst>(V))
673
1.08k
      continue;
674
64
675
64
    auto ZExt = cast<ZExtInst>(V);
676
64
    if (ZExt->getDestTy() != ExtTy)
677
4
      continue;
678
60
679
60
    Value *Src = ZExt->getOperand(0);
680
60
    if (ZExt->getSrcTy() == ZExt->getDestTy()) {
681
40
      LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
682
40
                 << "\n");
683
40
      ReplaceAllUsersOfWith(ZExt, Src);
684
40
      continue;
685
40
    }
686
20
687
20
    // Unless they produce a value that is narrower than ExtTy, we can
688
20
    // replace the result of the zext with the input of a newly inserted
689
20
    // trunc.
690
20
    if (NewInsts.count(Src) && 
isa<TruncInst>(Src)15
&&
691
20
        
Src->getType() == OrigTy15
) {
692
7
      auto *Trunc = cast<TruncInst>(Src);
693
7
      assert(Trunc->getOperand(0)->getType() == ExtTy &&
694
7
             "expected inserted trunc to be operating on i32");
695
7
      ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
696
7
    }
697
20
  }
698
170
699
170
  for (auto *I : InstsToRemove) {
700
123
    LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
701
123
    I->dropAllReferences();
702
123
    I->eraseFromParent();
703
123
  }
704
170
705
170
  InstsToRemove.clear();
706
170
  NewInsts.clear();
707
170
  TruncTysMap.clear();
708
170
  Promoted.clear();
709
170
  SafeToPromote->clear();
710
170
  SafeWrap->clear();
711
170
}
712
713
170
void IRPromoter::ConvertTruncs() {
714
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
715
170
  IRBuilder<> Builder{Ctx};
716
170
717
1.14k
  for (auto *V : *Visited) {
718
1.14k
    if (!isa<TruncInst>(V) || 
Sources->count(V)51
)
719
1.11k
      continue;
720
38
721
38
    auto *Trunc = cast<TruncInst>(V);
722
38
    Builder.SetInsertPoint(Trunc);
723
38
    IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
724
38
    IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
725
38
726
38
    unsigned NumBits = DestTy->getScalarSizeInBits();
727
38
    ConstantInt *Mask =
728
38
      ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
729
38
    Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
730
38
731
38
    if (auto *I = dyn_cast<Instruction>(Masked))
732
38
      NewInsts.insert(I);
733
38
734
38
    ReplaceAllUsersOfWith(Trunc, Masked);
735
38
  }
736
170
}
737
738
void IRPromoter::Mutate(Type *OrigTy,
739
                        SetVector<Value*> &Visited,
740
                        SmallPtrSetImpl<Value*> &Sources,
741
                        SmallPtrSetImpl<Instruction*> &Sinks,
742
                        SmallPtrSetImpl<Instruction*> &SafeToPromote,
743
170
                        SmallPtrSetImpl<Instruction*> &SafeWrap) {
744
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
745
170
             << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
746
170
747
170
  assert(isa<IntegerType>(OrigTy) && "expected integer type");
748
170
  this->OrigTy = cast<IntegerType>(OrigTy);
749
170
  assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
750
170
         "original type not smaller than extended type");
751
170
752
170
  this->Visited = &Visited;
753
170
  this->Sources = &Sources;
754
170
  this->Sinks = &Sinks;
755
170
  this->SafeToPromote = &SafeToPromote;
756
170
  this->SafeWrap = &SafeWrap;
757
170
758
170
  // Cache original types of the values that will likely need truncating
759
170
  for (auto *I : Sinks) {
760
122
    if (auto *Call = dyn_cast<CallInst>(I)) {
761
38
      for (unsigned i = 0; i < Call->getNumArgOperands(); 
++i24
) {
762
24
        Value *Arg = Call->getArgOperand(i);
763
24
        TruncTysMap[Call].push_back(Arg->getType());
764
24
      }
765
108
    } else if (auto *Switch = dyn_cast<SwitchInst>(I))
766
6
      TruncTysMap[I].push_back(Switch->getCondition()->getType());
767
102
    else {
768
235
      for (unsigned i = 0; i < I->getNumOperands(); 
++i133
)
769
133
        TruncTysMap[I].push_back(I->getOperand(i)->getType());
770
102
    }
771
122
  }
772
1.13k
  for (auto *V : Visited) {
773
1.13k
    if (!isa<TruncInst>(V) || 
Sources.count(V)51
)
774
1.09k
      continue;
775
38
    auto *Trunc = cast<TruncInst>(V);
776
38
    TruncTysMap[Trunc].push_back(Trunc->getDestTy());
777
38
  }
778
170
779
170
  // Convert adds using negative immediates to equivalent instructions that use
780
170
  // positive constants.
781
170
  PrepareWrappingAdds();
782
170
783
170
  // Insert zext instructions between sources and their users.
784
170
  ExtendSources();
785
170
786
170
  // Promote visited instructions, mutating their types in place. Also insert
787
170
  // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
788
170
  // promote.
789
170
  PromoteTree();
790
170
791
170
  // Convert any truncs, that aren't sources, into AND masks.
792
170
  ConvertTruncs();
793
170
794
170
  // Insert trunc instructions for use by calls, stores etc...
795
170
  TruncateSinks();
796
170
797
170
  // Finally, remove unecessary zexts and truncs, delete old instructions and
798
170
  // clear the data structures.
799
170
  Cleanup();
800
170
801
170
  LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
802
170
}
803
804
/// We accept most instructions, as well as Arguments and ConstantInsts. We
805
/// Disallow casts other than zext and truncs and only allow calls if their
806
/// return value is zeroext. We don't allow opcodes that can introduce sign
807
/// bits.
808
2.27k
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
809
2.27k
  if (auto *I = dyn_cast<ICmpInst>(V)) {
810
335
    // Now that we allow small types than TypeSize, only allow icmp of
811
335
    // TypeSize because they will require a trunc to be legalised.
812
335
    // TODO: Allow icmp of smaller types, and calculate at the end
813
335
    // whether the transform would be beneficial.
814
335
    if (isa<PointerType>(I->getOperand(0)->getType()))
815
2
      return true;
816
333
    return EqualTypeSize(I->getOperand(0));
817
333
  }
818
1.93k
819
1.93k
  if (GenerateSignBits(V)) {
820
37
    LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
821
37
    return false;
822
37
  }
823
1.89k
824
1.89k
  // Memory instructions
825
1.89k
  if (isa<StoreInst>(V) || 
isa<GetElementPtrInst>(V)1.86k
)
826
34
    return true;
827
1.86k
828
1.86k
  // Branches and targets.
829
1.86k
  if( isa<BranchInst>(V) || isa<SwitchInst>(V) || 
isa<BasicBlock>(V)1.85k
)
830
22
    return true;
831
1.84k
832
1.84k
  // Non-instruction values that we can handle.
833
1.84k
  if ((isa<Constant>(V) && 
!isa<ConstantExpr>(V)440
) ||
isa<Argument>(V)1.40k
)
834
720
    return isSupportedType(V);
835
1.12k
836
1.12k
  if (isa<PHINode>(V) || 
isa<SelectInst>(V)1.02k
||
isa<ReturnInst>(V)917
||
837
1.12k
      
isa<LoadInst>(V)870
)
838
408
    return isSupportedType(V);
839
714
840
714
  if (auto *Cast = dyn_cast<CastInst>(V))
841
241
    return isSupportedType(Cast) || 
isSupportedType(Cast->getOperand(0))56
;
842
473
843
473
  // Special cases for calls as we need to check for zeroext
844
473
  // TODO We should accept calls even if they don't have zeroext, as they can
845
473
  // still be sinks.
846
473
  if (auto *Call = dyn_cast<CallInst>(V))
847
24
    return isSupportedType(Call) &&
848
24
           
Call->hasRetAttr(Attribute::AttrKind::ZExt)22
;
849
449
850
449
  if (!isa<BinaryOperator>(V))
851
2
    return false;
852
447
853
447
  if (!isSupportedType(V))
854
0
    return false;
855
447
856
447
  return true;
857
447
}
858
859
/// Check that the type of V would be promoted and that the original type is
860
/// smaller than the targeted promoted type. Check that we're not trying to
861
/// promote something larger than our base 'TypeSize' type.
862
1.27k
bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
863
1.27k
864
1.27k
  auto *I = dyn_cast<Instruction>(V);
865
1.27k
  if (!I)
866
282
    return true;
867
997
868
997
  if (SafeToPromote.count(I))
869
97
   return true;
870
900
871
900
  if (isPromotedResultSafe(V) || 
isSafeWrap(I)85
) {
872
834
    SafeToPromote.insert(I);
873
834
    return true;
874
834
  }
875
66
876
66
  if (I->getOpcode() != Instruction::Add && 
I->getOpcode() != Instruction::Sub33
)
877
2
    return false;
878
64
879
64
  // If promotion is not safe, can we use a DSP instruction to natively
880
64
  // handle the narrow type?
881
64
  if (!ST->hasDSP() || !EnableDSP || 
!isSupportedType(I)38
)
882
26
    return false;
883
38
884
38
  if (ST->isThumb() && !ST->hasThumb2())
885
0
    return false;
886
38
887
38
  // TODO
888
38
  // Would it be profitable? For Thumb code, these parallel DSP instructions
889
38
  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
890
38
  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
891
38
  // halved. They also do not take immediates as operands.
892
74
  
for (auto &Op : I->operands())38
{
893
74
    if (isa<Constant>(Op)) {
894
14
      if (!EnableDSPWithImms)
895
7
        return false;
896
14
    }
897
74
  }
898
38
  
LLVM_DEBUG31
(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
899
31
  return true;
900
38
}
901
902
452
bool ARMCodeGenPrepare::TryToPromote(Value *V) {
903
452
  OrigTy = V->getType();
904
452
  TypeSize = OrigTy->getPrimitiveSizeInBits();
905
452
  if (TypeSize > 16 || 
TypeSize < 8333
)
906
133
    return false;
907
319
908
319
  SafeToPromote.clear();
909
319
  SafeWrap.clear();
910
319
911
319
  if (!isSupportedValue(V) || 
!shouldPromote(V)305
||
!isLegalToPromote(V)301
)
912
42
    return false;
913
277
914
277
  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
915
277
             << TypeSize << "\n");
916
277
917
277
  SetVector<Value*> WorkList;
918
277
  SmallPtrSet<Value*, 8> Sources;
919
277
  SmallPtrSet<Instruction*, 4> Sinks;
920
277
  SetVector<Value*> CurrentVisited;
921
277
  WorkList.insert(V);
922
277
923
277
  // Return true if V was added to the worklist as a supported instruction,
924
277
  // if it was already visited, or if we don't need to explore it (e.g.
925
277
  // pointer values and GEPs), and false otherwise.
926
3.16k
  auto AddLegalInst = [&](Value *V) {
927
3.16k
    if (CurrentVisited.count(V))
928
1.21k
      return true;
929
1.95k
930
1.95k
    // Ignore GEPs because they don't need promoting and the constant indices
931
1.95k
    // will prevent the transformation.
932
1.95k
    if (isa<GetElementPtrInst>(V))
933
4
      return true;
934
1.95k
935
1.95k
    if (!isSupportedValue(V) || 
(1.89k
shouldPromote(V)1.89k
&&
!isLegalToPromote(V)978
)) {
936
71
      LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
937
71
      return false;
938
71
    }
939
1.88k
940
1.88k
    WorkList.insert(V);
941
1.88k
    return true;
942
1.88k
  };
943
277
944
277
  // Iterate through, and add to, a tree of operands and users in the use-def.
945
2.09k
  while (!WorkList.empty()) {
946
1.92k
    Value *V = WorkList.back();
947
1.92k
    WorkList.pop_back();
948
1.92k
    if (CurrentVisited.count(V))
949
0
      continue;
950
1.92k
951
1.92k
    // Ignore non-instructions, other than arguments.
952
1.92k
    if (!isa<Instruction>(V) && 
!isSource(V)676
)
953
430
      continue;
954
1.49k
955
1.49k
    // If we've already visited this value from somewhere, bail now because
956
1.49k
    // the tree has already been explored.
957
1.49k
    // TODO: This could limit the transform, ie if we try to promote something
958
1.49k
    // from an i8 and fail first, before trying an i16.
959
1.49k
    if (AllVisited.count(V))
960
31
      return false;
961
1.46k
962
1.46k
    CurrentVisited.insert(V);
963
1.46k
    AllVisited.insert(V);
964
1.46k
965
1.46k
    // Calls can be both sources and sinks.
966
1.46k
    if (isSink(V))
967
151
      Sinks.insert(cast<Instruction>(V));
968
1.46k
969
1.46k
    if (isSource(V))
970
396
      Sources.insert(V);
971
1.46k
972
1.46k
    if (!isSink(V) && 
!isSource(V)1.31k
) {
973
928
      if (auto *I = dyn_cast<Instruction>(V)) {
974
928
        // Visit operands of any instruction visited.
975
1.80k
        for (auto &U : I->operands()) {
976
1.80k
          if (!AddLegalInst(U))
977
33
            return false;
978
1.80k
        }
979
928
      }
980
928
    }
981
1.46k
982
1.46k
    // Don't visit users of a node which isn't going to be mutated unless its a
983
1.46k
    // source.
984
1.46k
    
if (1.42k
isSource(V)1.42k
||
shouldPromote(V)1.03k
) {
985
1.36k
      for (Use &U : V->uses()) {
986
1.36k
        if (!AddLegalInst(U.getUser()))
987
38
          return false;
988
1.36k
      }
989
1.00k
    }
990
1.42k
  }
991
277
992
277
  
LLVM_DEBUG175
(dbgs() << "ARM CGP: Visited nodes:\n";
993
175
             for (auto *I : CurrentVisited)
994
175
               I->dump();
995
175
             );
996
175
  unsigned ToPromote = 0;
997
1.15k
  for (auto *V : CurrentVisited) {
998
1.15k
    if (Sources.count(V))
999
324
      continue;
1000
826
    if (Sinks.count(cast<Instruction>(V)))
1001
113
      continue;
1002
713
    ++ToPromote;
1003
713
  }
1004
175
1005
175
  if (ToPromote < 2)
1006
5
    return false;
1007
170
1008
170
  Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
1009
170
                   SafeWrap);
1010
170
  return true;
1011
170
}
1012
1013
4.90k
bool ARMCodeGenPrepare::doInitialization(Module &M) {
1014
4.90k
  Promoter = new IRPromoter(&M);
1015
4.90k
  return false;
1016
4.90k
}
1017
1018
25.3k
bool ARMCodeGenPrepare::runOnFunction(Function &F) {
1019
25.3k
  if (skipFunction(F) || 
DisableCGP25.3k
)
1020
25.0k
    return false;
1021
310
1022
310
  auto *TPC = &getAnalysis<TargetPassConfig>();
1023
310
  if (!TPC)
1024
0
    return false;
1025
310
1026
310
  const TargetMachine &TM = TPC->getTM<TargetMachine>();
1027
310
  ST = &TM.getSubtarget<ARMSubtarget>(F);
1028
310
  bool MadeChange = false;
1029
310
  LLVM_DEBUG(dbgs() << "ARM CGP: Running on " << F.getName() << "\n");
1030
310
1031
310
  // Search up from icmps to try to promote their operands.
1032
621
  for (BasicBlock &BB : F) {
1033
621
    auto &Insts = BB.getInstList();
1034
2.75k
    for (auto &I : Insts) {
1035
2.75k
      if (AllVisited.count(&I))
1036
396
        continue;
1037
2.35k
1038
2.35k
      if (isa<ICmpInst>(I)) {
1039
392
        auto &CI = cast<ICmpInst>(I);
1040
392
1041
392
        // Skip signed or pointer compares
1042
392
        if (CI.isSigned() || 
!isa<IntegerType>(CI.getOperand(0)->getType())371
)
1043
27
          continue;
1044
365
1045
365
        LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
1046
365
1047
730
        for (auto &Op : CI.operands()) {
1048
730
          if (auto *I = dyn_cast<Instruction>(Op))
1049
452
            MadeChange |= TryToPromote(I);
1050
730
        }
1051
365
      }
1052
2.35k
    }
1053
621
    LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
1054
621
                dbgs() << F;
1055
621
                report_fatal_error("Broken function after type promotion");
1056
621
               });
1057
621
  }
1058
310
  if (MadeChange)
1059
310
    LLVM_DEBUG(dbgs() << "After ARMCodeGenPrepare: " << F << "\n");
1060
310
1061
310
  return MadeChange;
1062
310
}
1063
1064
4.88k
bool ARMCodeGenPrepare::doFinalization(Module &M) {
1065
4.88k
  delete Promoter;
1066
4.88k
  return false;
1067
4.88k
}
1068
1069
101k
INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE,
1070
101k
                      "ARM IR optimizations", false, false)
1071
101k
INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
1072
                    false, false)
1073
1074
char ARMCodeGenPrepare::ID = 0;
1075
unsigned ARMCodeGenPrepare::TypeSize = 0;
1076
1077
4.91k
FunctionPass *llvm::createARMCodeGenPreparePass() {
1078
4.91k
  return new ARMCodeGenPrepare();
1079
4.91k
}