llvm-project
3662 строки · 128.7 Кб
1//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines routines for folding instructions into constants.
10//
11// Also, to supplement the basic IR ConstantExpr simplifications,
12// this file defines some additional folding routines that can make use of
13// DataLayout information. These functions cannot go in IR due to library
14// dependency issues.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/Analysis/ConstantFolding.h"19#include "llvm/ADT/APFloat.h"20#include "llvm/ADT/APInt.h"21#include "llvm/ADT/APSInt.h"22#include "llvm/ADT/ArrayRef.h"23#include "llvm/ADT/DenseMap.h"24#include "llvm/ADT/STLExtras.h"25#include "llvm/ADT/SmallVector.h"26#include "llvm/ADT/StringRef.h"27#include "llvm/Analysis/TargetFolder.h"28#include "llvm/Analysis/TargetLibraryInfo.h"29#include "llvm/Analysis/ValueTracking.h"30#include "llvm/Analysis/VectorUtils.h"31#include "llvm/Config/config.h"32#include "llvm/IR/Constant.h"33#include "llvm/IR/ConstantFold.h"34#include "llvm/IR/Constants.h"35#include "llvm/IR/DataLayout.h"36#include "llvm/IR/DerivedTypes.h"37#include "llvm/IR/Function.h"38#include "llvm/IR/GlobalValue.h"39#include "llvm/IR/GlobalVariable.h"40#include "llvm/IR/InstrTypes.h"41#include "llvm/IR/Instruction.h"42#include "llvm/IR/Instructions.h"43#include "llvm/IR/IntrinsicInst.h"44#include "llvm/IR/Intrinsics.h"45#include "llvm/IR/IntrinsicsAArch64.h"46#include "llvm/IR/IntrinsicsAMDGPU.h"47#include "llvm/IR/IntrinsicsARM.h"48#include "llvm/IR/IntrinsicsWebAssembly.h"49#include "llvm/IR/IntrinsicsX86.h"50#include "llvm/IR/Operator.h"51#include "llvm/IR/Type.h"52#include "llvm/IR/Value.h"53#include "llvm/Support/Casting.h"54#include "llvm/Support/ErrorHandling.h"55#include "llvm/Support/KnownBits.h"56#include "llvm/Support/MathExtras.h"57#include <cassert>58#include <cerrno>59#include <cfenv>60#include <cmath>61#include <cstdint>62
63using namespace llvm;64
65namespace {66
67//===----------------------------------------------------------------------===//
68// Constant Folding internal helper functions
69//===----------------------------------------------------------------------===//
70
71static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,72Constant *C, Type *SrcEltTy,73unsigned NumSrcElts,74const DataLayout &DL) {75// Now that we know that the input value is a vector of integers, just shift76// and insert them into our result.77unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);78for (unsigned i = 0; i != NumSrcElts; ++i) {79Constant *Element;80if (DL.isLittleEndian())81Element = C->getAggregateElement(NumSrcElts - i - 1);82else83Element = C->getAggregateElement(i);84
85if (Element && isa<UndefValue>(Element)) {86Result <<= BitShift;87continue;88}89
90auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);91if (!ElementCI)92return ConstantExpr::getBitCast(C, DestTy);93
94Result <<= BitShift;95Result |= ElementCI->getValue().zext(Result.getBitWidth());96}97
98return nullptr;99}
100
101/// Constant fold bitcast, symbolically evaluating it with DataLayout.
102/// This always returns a non-null constant, but it may be a
103/// ConstantExpr if unfoldable.
104Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {105assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&106"Invalid constantexpr bitcast!");107
108// Catch the obvious splat cases.109if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))110return Res;111
112if (auto *VTy = dyn_cast<VectorType>(C->getType())) {113// Handle a vector->scalar integer/fp cast.114if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {115unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();116Type *SrcEltTy = VTy->getElementType();117
118// If the vector is a vector of floating point, convert it to vector of int119// to simplify things.120if (SrcEltTy->isFloatingPointTy()) {121unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();122auto *SrcIVTy = FixedVectorType::get(123IntegerType::get(C->getContext(), FPWidth), NumSrcElts);124// Ask IR to do the conversion now that #elts line up.125C = ConstantExpr::getBitCast(C, SrcIVTy);126}127
128APInt Result(DL.getTypeSizeInBits(DestTy), 0);129if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,130SrcEltTy, NumSrcElts, DL))131return CE;132
133if (isa<IntegerType>(DestTy))134return ConstantInt::get(DestTy, Result);135
136APFloat FP(DestTy->getFltSemantics(), Result);137return ConstantFP::get(DestTy->getContext(), FP);138}139}140
141// The code below only handles casts to vectors currently.142auto *DestVTy = dyn_cast<VectorType>(DestTy);143if (!DestVTy)144return ConstantExpr::getBitCast(C, DestTy);145
146// If this is a scalar -> vector cast, convert the input into a <1 x scalar>147// vector so the code below can handle it uniformly.148if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {149Constant *Ops = C; // don't take the address of C!150return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);151}152
153// If this is a bitcast from constant vector -> vector, fold it.154if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))155return ConstantExpr::getBitCast(C, DestTy);156
157// If the element types match, IR can fold it.158unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();159unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();160if (NumDstElt == NumSrcElt)161return ConstantExpr::getBitCast(C, DestTy);162
163Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();164Type *DstEltTy = DestVTy->getElementType();165
166// Otherwise, we're changing the number of elements in a vector, which167// requires endianness information to do the right thing. For example,168// bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)169// folds to (little endian):170// <4 x i32> <i32 0, i32 0, i32 1, i32 0>171// and to (big endian):172// <4 x i32> <i32 0, i32 0, i32 0, i32 1>173
174// First thing is first. We only want to think about integer here, so if175// we have something in FP form, recast it as integer.176if (DstEltTy->isFloatingPointTy()) {177// Fold to an vector of integers with same size as our FP type.178unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();179auto *DestIVTy = FixedVectorType::get(180IntegerType::get(C->getContext(), FPWidth), NumDstElt);181// Recursively handle this integer conversion, if possible.182C = FoldBitCast(C, DestIVTy, DL);183
184// Finally, IR can handle this now that #elts line up.185return ConstantExpr::getBitCast(C, DestTy);186}187
188// Okay, we know the destination is integer, if the input is FP, convert189// it to integer first.190if (SrcEltTy->isFloatingPointTy()) {191unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();192auto *SrcIVTy = FixedVectorType::get(193IntegerType::get(C->getContext(), FPWidth), NumSrcElt);194// Ask IR to do the conversion now that #elts line up.195C = ConstantExpr::getBitCast(C, SrcIVTy);196// If IR wasn't able to fold it, bail out.197if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.198!isa<ConstantDataVector>(C))199return C;200}201
202// Now we know that the input and output vectors are both integer vectors203// of the same size, and that their #elements is not the same. Do the204// conversion here, which depends on whether the input or output has205// more elements.206bool isLittleEndian = DL.isLittleEndian();207
208SmallVector<Constant*, 32> Result;209if (NumDstElt < NumSrcElt) {210// Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)211Constant *Zero = Constant::getNullValue(DstEltTy);212unsigned Ratio = NumSrcElt/NumDstElt;213unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();214unsigned SrcElt = 0;215for (unsigned i = 0; i != NumDstElt; ++i) {216// Build each element of the result.217Constant *Elt = Zero;218unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);219for (unsigned j = 0; j != Ratio; ++j) {220Constant *Src = C->getAggregateElement(SrcElt++);221if (Src && isa<UndefValue>(Src))222Src = Constant::getNullValue(223cast<VectorType>(C->getType())->getElementType());224else225Src = dyn_cast_or_null<ConstantInt>(Src);226if (!Src) // Reject constantexpr elements.227return ConstantExpr::getBitCast(C, DestTy);228
229// Zero extend the element to the right size.230Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),231DL);232assert(Src && "Constant folding cannot fail on plain integers");233
234// Shift it to the right place, depending on endianness.235Src = ConstantFoldBinaryOpOperands(236Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),237DL);238assert(Src && "Constant folding cannot fail on plain integers");239
240ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;241
242// Mix it in.243Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);244assert(Elt && "Constant folding cannot fail on plain integers");245}246Result.push_back(Elt);247}248return ConstantVector::get(Result);249}250
251// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)252unsigned Ratio = NumDstElt/NumSrcElt;253unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);254
255// Loop over each source value, expanding into multiple results.256for (unsigned i = 0; i != NumSrcElt; ++i) {257auto *Element = C->getAggregateElement(i);258
259if (!Element) // Reject constantexpr elements.260return ConstantExpr::getBitCast(C, DestTy);261
262if (isa<UndefValue>(Element)) {263// Correctly Propagate undef values.264Result.append(Ratio, UndefValue::get(DstEltTy));265continue;266}267
268auto *Src = dyn_cast<ConstantInt>(Element);269if (!Src)270return ConstantExpr::getBitCast(C, DestTy);271
272unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);273for (unsigned j = 0; j != Ratio; ++j) {274// Shift the piece of the value into the right place, depending on275// endianness.276APInt Elt = Src->getValue().lshr(ShiftAmt);277ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;278
279// Truncate and remember this piece.280Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));281}282}283
284return ConstantVector::get(Result);285}
286
287} // end anonymous namespace288
289/// If this constant is a constant offset from a global, return the global and
290/// the constant. Because of constantexprs, this function is recursive.
291bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,292APInt &Offset, const DataLayout &DL,293DSOLocalEquivalent **DSOEquiv) {294if (DSOEquiv)295*DSOEquiv = nullptr;296
297// Trivial case, constant is the global.298if ((GV = dyn_cast<GlobalValue>(C))) {299unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());300Offset = APInt(BitWidth, 0);301return true;302}303
304if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {305if (DSOEquiv)306*DSOEquiv = FoundDSOEquiv;307GV = FoundDSOEquiv->getGlobalValue();308unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());309Offset = APInt(BitWidth, 0);310return true;311}312
313// Otherwise, if this isn't a constant expr, bail out.314auto *CE = dyn_cast<ConstantExpr>(C);315if (!CE) return false;316
317// Look through ptr->int and ptr->ptr casts.318if (CE->getOpcode() == Instruction::PtrToInt ||319CE->getOpcode() == Instruction::BitCast)320return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,321DSOEquiv);322
323// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)324auto *GEP = dyn_cast<GEPOperator>(CE);325if (!GEP)326return false;327
328unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());329APInt TmpOffset(BitWidth, 0);330
331// If the base isn't a global+constant, we aren't either.332if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,333DSOEquiv))334return false;335
336// Otherwise, add any offset that our operands provide.337if (!GEP->accumulateConstantOffset(DL, TmpOffset))338return false;339
340Offset = TmpOffset;341return true;342}
343
344Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,345const DataLayout &DL) {346do {347Type *SrcTy = C->getType();348if (SrcTy == DestTy)349return C;350
351TypeSize DestSize = DL.getTypeSizeInBits(DestTy);352TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);353if (!TypeSize::isKnownGE(SrcSize, DestSize))354return nullptr;355
356// Catch the obvious splat cases (since all-zeros can coerce non-integral357// pointers legally).358if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))359return Res;360
361// If the type sizes are the same and a cast is legal, just directly362// cast the constant.363// But be careful not to coerce non-integral pointers illegally.364if (SrcSize == DestSize &&365DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==366DL.isNonIntegralPointerType(DestTy->getScalarType())) {367Instruction::CastOps Cast = Instruction::BitCast;368// If we are going from a pointer to int or vice versa, we spell the cast369// differently.370if (SrcTy->isIntegerTy() && DestTy->isPointerTy())371Cast = Instruction::IntToPtr;372else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())373Cast = Instruction::PtrToInt;374
375if (CastInst::castIsValid(Cast, C, DestTy))376return ConstantFoldCastOperand(Cast, C, DestTy, DL);377}378
379// If this isn't an aggregate type, there is nothing we can do to drill down380// and find a bitcastable constant.381if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())382return nullptr;383
384// We're simulating a load through a pointer that was bitcast to point to385// a different type, so we can try to walk down through the initial386// elements of an aggregate to see if some part of the aggregate is387// castable to implement the "load" semantic model.388if (SrcTy->isStructTy()) {389// Struct types might have leading zero-length elements like [0 x i32],390// which are certainly not what we are looking for, so skip them.391unsigned Elem = 0;392Constant *ElemC;393do {394ElemC = C->getAggregateElement(Elem++);395} while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());396C = ElemC;397} else {398// For non-byte-sized vector elements, the first element is not399// necessarily located at the vector base address.400if (auto *VT = dyn_cast<VectorType>(SrcTy))401if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))402return nullptr;403
404C = C->getAggregateElement(0u);405}406} while (C);407
408return nullptr;409}
410
411namespace {412
413/// Recursive helper to read bits out of global. C is the constant being copied
414/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
415/// results into and BytesLeft is the number of bytes left in
416/// the CurPtr buffer. DL is the DataLayout.
417bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,418unsigned BytesLeft, const DataLayout &DL) {419assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&420"Out of range access");421
422// If this element is zero or undefined, we can just return since *CurPtr is423// zero initialized.424if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))425return true;426
427if (auto *CI = dyn_cast<ConstantInt>(C)) {428if ((CI->getBitWidth() & 7) != 0)429return false;430const APInt &Val = CI->getValue();431unsigned IntBytes = unsigned(CI->getBitWidth()/8);432
433for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {434unsigned n = ByteOffset;435if (!DL.isLittleEndian())436n = IntBytes - n - 1;437CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue();438++ByteOffset;439}440return true;441}442
443if (auto *CFP = dyn_cast<ConstantFP>(C)) {444if (CFP->getType()->isDoubleTy()) {445C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);446return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);447}448if (CFP->getType()->isFloatTy()){449C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);450return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);451}452if (CFP->getType()->isHalfTy()){453C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);454return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);455}456return false;457}458
459if (auto *CS = dyn_cast<ConstantStruct>(C)) {460const StructLayout *SL = DL.getStructLayout(CS->getType());461unsigned Index = SL->getElementContainingOffset(ByteOffset);462uint64_t CurEltOffset = SL->getElementOffset(Index);463ByteOffset -= CurEltOffset;464
465while (true) {466// If the element access is to the element itself and not to tail padding,467// read the bytes from the element.468uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());469
470if (ByteOffset < EltSize &&471!ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,472BytesLeft, DL))473return false;474
475++Index;476
477// Check to see if we read from the last struct element, if so we're done.478if (Index == CS->getType()->getNumElements())479return true;480
481// If we read all of the bytes we needed from this element we're done.482uint64_t NextEltOffset = SL->getElementOffset(Index);483
484if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)485return true;486
487// Move to the next element of the struct.488CurPtr += NextEltOffset - CurEltOffset - ByteOffset;489BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;490ByteOffset = 0;491CurEltOffset = NextEltOffset;492}493// not reached.494}495
496if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||497isa<ConstantDataSequential>(C)) {498uint64_t NumElts, EltSize;499Type *EltTy;500if (auto *AT = dyn_cast<ArrayType>(C->getType())) {501NumElts = AT->getNumElements();502EltTy = AT->getElementType();503EltSize = DL.getTypeAllocSize(EltTy);504} else {505NumElts = cast<FixedVectorType>(C->getType())->getNumElements();506EltTy = cast<FixedVectorType>(C->getType())->getElementType();507// TODO: For non-byte-sized vectors, current implementation assumes there is508// padding to the next byte boundary between elements.509if (!DL.typeSizeEqualsStoreSize(EltTy))510return false;511
512EltSize = DL.getTypeStoreSize(EltTy);513}514uint64_t Index = ByteOffset / EltSize;515uint64_t Offset = ByteOffset - Index * EltSize;516
517for (; Index != NumElts; ++Index) {518if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,519BytesLeft, DL))520return false;521
522uint64_t BytesWritten = EltSize - Offset;523assert(BytesWritten <= EltSize && "Not indexing into this element?");524if (BytesWritten >= BytesLeft)525return true;526
527Offset = 0;528BytesLeft -= BytesWritten;529CurPtr += BytesWritten;530}531return true;532}533
534if (auto *CE = dyn_cast<ConstantExpr>(C)) {535if (CE->getOpcode() == Instruction::IntToPtr &&536CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {537return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,538BytesLeft, DL);539}540}541
542// Otherwise, unknown initializer type.543return false;544}
545
546Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,547int64_t Offset, const DataLayout &DL) {548// Bail out early. Not expect to load from scalable global variable.549if (isa<ScalableVectorType>(LoadTy))550return nullptr;551
552auto *IntType = dyn_cast<IntegerType>(LoadTy);553
554// If this isn't an integer load we can't fold it directly.555if (!IntType) {556// If this is a non-integer load, we can try folding it as an int load and557// then bitcast the result. This can be useful for union cases. Note558// that address spaces don't matter here since we're not going to result in559// an actual new load.560if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&561!LoadTy->isVectorTy())562return nullptr;563
564Type *MapTy = Type::getIntNTy(C->getContext(),565DL.getTypeSizeInBits(LoadTy).getFixedValue());566if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {567if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&568!LoadTy->isX86_AMXTy())569// Materializing a zero can be done trivially without a bitcast570return Constant::getNullValue(LoadTy);571Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;572Res = FoldBitCast(Res, CastTy, DL);573if (LoadTy->isPtrOrPtrVectorTy()) {574// For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr575if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&576!LoadTy->isX86_AMXTy())577return Constant::getNullValue(LoadTy);578if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))579// Be careful not to replace a load of an addrspace value with an inttoptr here580return nullptr;581Res = ConstantExpr::getIntToPtr(Res, LoadTy);582}583return Res;584}585return nullptr;586}587
588unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;589if (BytesLoaded > 32 || BytesLoaded == 0)590return nullptr;591
592// If we're not accessing anything in this constant, the result is undefined.593if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))594return PoisonValue::get(IntType);595
596// TODO: We should be able to support scalable types.597TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());598if (InitializerSize.isScalable())599return nullptr;600
601// If we're not accessing anything in this constant, the result is undefined.602if (Offset >= (int64_t)InitializerSize.getFixedValue())603return PoisonValue::get(IntType);604
605unsigned char RawBytes[32] = {0};606unsigned char *CurPtr = RawBytes;607unsigned BytesLeft = BytesLoaded;608
609// If we're loading off the beginning of the global, some bytes may be valid.610if (Offset < 0) {611CurPtr += -Offset;612BytesLeft += Offset;613Offset = 0;614}615
616if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))617return nullptr;618
619APInt ResultVal = APInt(IntType->getBitWidth(), 0);620if (DL.isLittleEndian()) {621ResultVal = RawBytes[BytesLoaded - 1];622for (unsigned i = 1; i != BytesLoaded; ++i) {623ResultVal <<= 8;624ResultVal |= RawBytes[BytesLoaded - 1 - i];625}626} else {627ResultVal = RawBytes[0];628for (unsigned i = 1; i != BytesLoaded; ++i) {629ResultVal <<= 8;630ResultVal |= RawBytes[i];631}632}633
634return ConstantInt::get(IntType->getContext(), ResultVal);635}
636
637} // anonymous namespace638
639// If GV is a constant with an initializer read its representation starting
640// at Offset and return it as a constant array of unsigned char. Otherwise
641// return null.
642Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV,643uint64_t Offset) {644if (!GV->isConstant() || !GV->hasDefinitiveInitializer())645return nullptr;646
647const DataLayout &DL = GV->getDataLayout();648Constant *Init = const_cast<Constant *>(GV->getInitializer());649TypeSize InitSize = DL.getTypeAllocSize(Init->getType());650if (InitSize < Offset)651return nullptr;652
653uint64_t NBytes = InitSize - Offset;654if (NBytes > UINT16_MAX)655// Bail for large initializers in excess of 64K to avoid allocating656// too much memory.657// Offset is assumed to be less than or equal than InitSize (this658// is enforced in ReadDataFromGlobal).659return nullptr;660
661SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));662unsigned char *CurPtr = RawBytes.data();663
664if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))665return nullptr;666
667return ConstantDataArray::get(GV->getContext(), RawBytes);668}
669
670/// If this Offset points exactly to the start of an aggregate element, return
671/// that element, otherwise return nullptr.
672Constant *getConstantAtOffset(Constant *Base, APInt Offset,673const DataLayout &DL) {674if (Offset.isZero())675return Base;676
677if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))678return nullptr;679
680Type *ElemTy = Base->getType();681SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);682if (!Offset.isZero() || !Indices[0].isZero())683return nullptr;684
685Constant *C = Base;686for (const APInt &Index : drop_begin(Indices)) {687if (Index.isNegative() || Index.getActiveBits() >= 32)688return nullptr;689
690C = C->getAggregateElement(Index.getZExtValue());691if (!C)692return nullptr;693}694
695return C;696}
697
698Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,699const APInt &Offset,700const DataLayout &DL) {701if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))702if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))703return Result;704
705// Explicitly check for out-of-bounds access, so we return poison even if the706// constant is a uniform value.707TypeSize Size = DL.getTypeAllocSize(C->getType());708if (!Size.isScalable() && Offset.sge(Size.getFixedValue()))709return PoisonValue::get(Ty);710
711// Try an offset-independent fold of a uniform value.712if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL))713return Result;714
715// Try hard to fold loads from bitcasted strange and non-type-safe things.716if (Offset.getSignificantBits() <= 64)717if (Constant *Result =718FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))719return Result;720
721return nullptr;722}
723
724Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,725const DataLayout &DL) {726return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);727}
728
729Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,730APInt Offset,731const DataLayout &DL) {732// We can only fold loads from constant globals with a definitive initializer.733// Check this upfront, to skip expensive offset calculations.734auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C));735if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())736return nullptr;737
738C = cast<Constant>(C->stripAndAccumulateConstantOffsets(739DL, Offset, /* AllowNonInbounds */ true));740
741if (C == GV)742if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,743Offset, DL))744return Result;745
746// If this load comes from anywhere in a uniform constant global, the value747// is always the same, regardless of the loaded offset.748return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL);749}
750
751Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,752const DataLayout &DL) {753APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);754return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL);755}
756
757Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty,758const DataLayout &DL) {759if (isa<PoisonValue>(C))760return PoisonValue::get(Ty);761if (isa<UndefValue>(C))762return UndefValue::get(Ty);763// If padding is needed when storing C to memory, then it isn't considered as764// uniform.765if (!DL.typeSizeEqualsStoreSize(C->getType()))766return nullptr;767if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy())768return Constant::getNullValue(Ty);769if (C->isAllOnesValue() &&770(Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))771return Constant::getAllOnesValue(Ty);772return nullptr;773}
774
775namespace {776
777/// One of Op0/Op1 is a constant expression.
778/// Attempt to symbolically evaluate the result of a binary operator merging
779/// these together. If target data info is available, it is provided as DL,
780/// otherwise DL is null.
781Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,782const DataLayout &DL) {783// SROA784
785// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.786// Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute787// bits.788
789if (Opc == Instruction::And) {790KnownBits Known0 = computeKnownBits(Op0, DL);791KnownBits Known1 = computeKnownBits(Op1, DL);792if ((Known1.One | Known0.Zero).isAllOnes()) {793// All the bits of Op0 that the 'and' could be masking are already zero.794return Op0;795}796if ((Known0.One | Known1.Zero).isAllOnes()) {797// All the bits of Op1 that the 'and' could be masking are already zero.798return Op1;799}800
801Known0 &= Known1;802if (Known0.isConstant())803return ConstantInt::get(Op0->getType(), Known0.getConstant());804}805
806// If the constant expr is something like &A[123] - &A[4].f, fold this into a807// constant. This happens frequently when iterating over a global array.808if (Opc == Instruction::Sub) {809GlobalValue *GV1, *GV2;810APInt Offs1, Offs2;811
812if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))813if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {814unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());815
816// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.817// PtrToInt may change the bitwidth so we have convert to the right size818// first.819return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -820Offs2.zextOrTrunc(OpSize));821}822}823
824return nullptr;825}
826
827/// If array indices are not pointer-sized integers, explicitly cast them so
828/// that they aren't implicitly casted by the getelementptr.
829Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,830Type *ResultTy, GEPNoWrapFlags NW,831std::optional<ConstantRange> InRange,832const DataLayout &DL, const TargetLibraryInfo *TLI) {833Type *IntIdxTy = DL.getIndexType(ResultTy);834Type *IntIdxScalarTy = IntIdxTy->getScalarType();835
836bool Any = false;837SmallVector<Constant*, 32> NewIdxs;838for (unsigned i = 1, e = Ops.size(); i != e; ++i) {839if ((i == 1 ||840!isa<StructType>(GetElementPtrInst::getIndexedType(841SrcElemTy, Ops.slice(1, i - 1)))) &&842Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {843Any = true;844Type *NewType =845Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;846Constant *NewIdx = ConstantFoldCastOperand(847CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType,848DL);849if (!NewIdx)850return nullptr;851NewIdxs.push_back(NewIdx);852} else853NewIdxs.push_back(Ops[i]);854}855
856if (!Any)857return nullptr;858
859Constant *C =860ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange);861return ConstantFoldConstant(C, DL, TLI);862}
863
864/// If we can symbolically evaluate the GEP constant expression, do so.
865Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,866ArrayRef<Constant *> Ops,867const DataLayout &DL,868const TargetLibraryInfo *TLI) {869Type *SrcElemTy = GEP->getSourceElementType();870Type *ResTy = GEP->getType();871if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))872return nullptr;873
874if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(),875GEP->getInRange(), DL, TLI))876return C;877
878Constant *Ptr = Ops[0];879if (!Ptr->getType()->isPointerTy())880return nullptr;881
882Type *IntIdxTy = DL.getIndexType(Ptr->getType());883
884for (unsigned i = 1, e = Ops.size(); i != e; ++i)885if (!isa<ConstantInt>(Ops[i]))886return nullptr;887
888unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);889APInt Offset = APInt(890BitWidth,891DL.getIndexedOffsetInType(892SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)));893
894std::optional<ConstantRange> InRange = GEP->getInRange();895if (InRange)896InRange = InRange->sextOrTrunc(BitWidth);897
898// If this is a GEP of a GEP, fold it all into a single GEP.899GEPNoWrapFlags NW = GEP->getNoWrapFlags();900bool Overflow = false;901while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {902NW &= GEP->getNoWrapFlags();903
904SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));905
906// Do not try the incorporate the sub-GEP if some index is not a number.907bool AllConstantInt = true;908for (Value *NestedOp : NestedOps)909if (!isa<ConstantInt>(NestedOp)) {910AllConstantInt = false;911break;912}913if (!AllConstantInt)914break;915
916// TODO: Try to intersect two inrange attributes?917if (!InRange) {918InRange = GEP->getInRange();919if (InRange)920// Adjust inrange by offset until now.921InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset);922}923
924Ptr = cast<Constant>(GEP->getOperand(0));925SrcElemTy = GEP->getSourceElementType();926Offset = Offset.sadd_ov(927APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps)),928Overflow);929}930
931// Preserving nusw (without inbounds) also requires that the offset932// additions did not overflow.933if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow)934NW = NW.withoutNoUnsignedSignedWrap();935
936// If the base value for this address is a literal integer value, fold the937// getelementptr to the resulting integer value casted to the pointer type.938APInt BasePtr(BitWidth, 0);939if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {940if (CE->getOpcode() == Instruction::IntToPtr) {941if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))942BasePtr = Base->getValue().zextOrTrunc(BitWidth);943}944}945
946auto *PTy = cast<PointerType>(Ptr->getType());947if ((Ptr->isNullValue() || BasePtr != 0) &&948!DL.isNonIntegralPointerType(PTy)) {949Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);950return ConstantExpr::getIntToPtr(C, ResTy);951}952
953// Try to infer inbounds for GEPs of globals.954// TODO(gep_nowrap): Also infer nuw flag.955if (!NW.isInBounds() && Offset.isNonNegative()) {956bool CanBeNull, CanBeFreed;957uint64_t DerefBytes =958Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);959if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes))960NW |= GEPNoWrapFlags::inBounds();961}962
963// Otherwise canonicalize this to a single ptradd.964LLVMContext &Ctx = Ptr->getContext();965return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), Ptr,966ConstantInt::get(Ctx, Offset), NW,967InRange);968}
969
970/// Attempt to constant fold an instruction with the
971/// specified opcode and operands. If successful, the constant result is
972/// returned, if not, null is returned. Note that this function can fail when
973/// attempting to fold instructions like loads and stores, which have no
974/// constant expression form.
975Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,976ArrayRef<Constant *> Ops,977const DataLayout &DL,978const TargetLibraryInfo *TLI,979bool AllowNonDeterministic) {980Type *DestTy = InstOrCE->getType();981
982if (Instruction::isUnaryOp(Opcode))983return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);984
985if (Instruction::isBinaryOp(Opcode)) {986switch (Opcode) {987default:988break;989case Instruction::FAdd:990case Instruction::FSub:991case Instruction::FMul:992case Instruction::FDiv:993case Instruction::FRem:994// Handle floating point instructions separately to account for denormals995// TODO: If a constant expression is being folded rather than an996// instruction, denormals will not be flushed/treated as zero997if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {998return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,999AllowNonDeterministic);1000}1001}1002return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);1003}1004
1005if (Instruction::isCast(Opcode))1006return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);1007
1008if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {1009Type *SrcElemTy = GEP->getSourceElementType();1010if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy))1011return nullptr;1012
1013if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))1014return C;1015
1016return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1),1017GEP->getNoWrapFlags(),1018GEP->getInRange());1019}1020
1021if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))1022return CE->getWithOperands(Ops);1023
1024switch (Opcode) {1025default: return nullptr;1026case Instruction::ICmp:1027case Instruction::FCmp: {1028auto *C = cast<CmpInst>(InstOrCE);1029return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],1030DL, TLI, C);1031}1032case Instruction::Freeze:1033return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;1034case Instruction::Call:1035if (auto *F = dyn_cast<Function>(Ops.back())) {1036const auto *Call = cast<CallBase>(InstOrCE);1037if (canConstantFoldCallTo(Call, F))1038return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,1039AllowNonDeterministic);1040}1041return nullptr;1042case Instruction::Select:1043return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]);1044case Instruction::ExtractElement:1045return ConstantExpr::getExtractElement(Ops[0], Ops[1]);1046case Instruction::ExtractValue:1047return ConstantFoldExtractValueInstruction(1048Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());1049case Instruction::InsertElement:1050return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);1051case Instruction::InsertValue:1052return ConstantFoldInsertValueInstruction(1053Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());1054case Instruction::ShuffleVector:1055return ConstantExpr::getShuffleVector(1056Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());1057case Instruction::Load: {1058const auto *LI = dyn_cast<LoadInst>(InstOrCE);1059if (LI->isVolatile())1060return nullptr;1061return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);1062}1063}1064}
1065
1066} // end anonymous namespace1067
1068//===----------------------------------------------------------------------===//
1069// Constant Folding public APIs
1070//===----------------------------------------------------------------------===//
1071
1072namespace {1073
1074Constant *1075ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,1076const TargetLibraryInfo *TLI,1077SmallDenseMap<Constant *, Constant *> &FoldedOps) {1078if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))1079return const_cast<Constant *>(C);1080
1081SmallVector<Constant *, 8> Ops;1082for (const Use &OldU : C->operands()) {1083Constant *OldC = cast<Constant>(&OldU);1084Constant *NewC = OldC;1085// Recursively fold the ConstantExpr's operands. If we have already folded1086// a ConstantExpr, we don't have to process it again.1087if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {1088auto It = FoldedOps.find(OldC);1089if (It == FoldedOps.end()) {1090NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);1091FoldedOps.insert({OldC, NewC});1092} else {1093NewC = It->second;1094}1095}1096Ops.push_back(NewC);1097}1098
1099if (auto *CE = dyn_cast<ConstantExpr>(C)) {1100if (Constant *Res = ConstantFoldInstOperandsImpl(1101CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))1102return Res;1103return const_cast<Constant *>(C);1104}1105
1106assert(isa<ConstantVector>(C));1107return ConstantVector::get(Ops);1108}
1109
1110} // end anonymous namespace1111
1112Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,1113const TargetLibraryInfo *TLI) {1114// Handle PHI nodes quickly here...1115if (auto *PN = dyn_cast<PHINode>(I)) {1116Constant *CommonValue = nullptr;1117
1118SmallDenseMap<Constant *, Constant *> FoldedOps;1119for (Value *Incoming : PN->incoming_values()) {1120// If the incoming value is undef then skip it. Note that while we could1121// skip the value if it is equal to the phi node itself we choose not to1122// because that would break the rule that constant folding only applies if1123// all operands are constants.1124if (isa<UndefValue>(Incoming))1125continue;1126// If the incoming value is not a constant, then give up.1127auto *C = dyn_cast<Constant>(Incoming);1128if (!C)1129return nullptr;1130// Fold the PHI's operands.1131C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);1132// If the incoming value is a different constant to1133// the one we saw previously, then give up.1134if (CommonValue && C != CommonValue)1135return nullptr;1136CommonValue = C;1137}1138
1139// If we reach here, all incoming values are the same constant or undef.1140return CommonValue ? CommonValue : UndefValue::get(PN->getType());1141}1142
1143// Scan the operand list, checking to see if they are all constants, if so,1144// hand off to ConstantFoldInstOperandsImpl.1145if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))1146return nullptr;1147
1148SmallDenseMap<Constant *, Constant *> FoldedOps;1149SmallVector<Constant *, 8> Ops;1150for (const Use &OpU : I->operands()) {1151auto *Op = cast<Constant>(&OpU);1152// Fold the Instruction's operands.1153Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);1154Ops.push_back(Op);1155}1156
1157return ConstantFoldInstOperands(I, Ops, DL, TLI);1158}
1159
1160Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,1161const TargetLibraryInfo *TLI) {1162SmallDenseMap<Constant *, Constant *> FoldedOps;1163return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);1164}
1165
1166Constant *llvm::ConstantFoldInstOperands(Instruction *I,1167ArrayRef<Constant *> Ops,1168const DataLayout &DL,1169const TargetLibraryInfo *TLI,1170bool AllowNonDeterministic) {1171return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,1172AllowNonDeterministic);1173}
1174
1175Constant *llvm::ConstantFoldCompareInstOperands(1176unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,1177const TargetLibraryInfo *TLI, const Instruction *I) {1178CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;1179// fold: icmp (inttoptr x), null -> icmp x, 01180// fold: icmp null, (inttoptr x) -> icmp 0, x1181// fold: icmp (ptrtoint x), 0 -> icmp x, null1182// fold: icmp 0, (ptrtoint x) -> icmp null, x1183// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y1184// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y1185//1186// FIXME: The following comment is out of data and the DataLayout is here now.1187// ConstantExpr::getCompare cannot do this, because it doesn't have DL1188// around to know if bit truncation is happening.1189if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {1190if (Ops1->isNullValue()) {1191if (CE0->getOpcode() == Instruction::IntToPtr) {1192Type *IntPtrTy = DL.getIntPtrType(CE0->getType());1193// Convert the integer value to the right size to ensure we get the1194// proper extension or truncation.1195if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,1196/*IsSigned*/ false, DL)) {1197Constant *Null = Constant::getNullValue(C->getType());1198return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);1199}1200}1201
1202// Only do this transformation if the int is intptrty in size, otherwise1203// there is a truncation or extension that we aren't modeling.1204if (CE0->getOpcode() == Instruction::PtrToInt) {1205Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());1206if (CE0->getType() == IntPtrTy) {1207Constant *C = CE0->getOperand(0);1208Constant *Null = Constant::getNullValue(C->getType());1209return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);1210}1211}1212}1213
1214if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {1215if (CE0->getOpcode() == CE1->getOpcode()) {1216if (CE0->getOpcode() == Instruction::IntToPtr) {1217Type *IntPtrTy = DL.getIntPtrType(CE0->getType());1218
1219// Convert the integer value to the right size to ensure we get the1220// proper extension or truncation.1221Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,1222/*IsSigned*/ false, DL);1223Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy,1224/*IsSigned*/ false, DL);1225if (C0 && C1)1226return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);1227}1228
1229// Only do this transformation if the int is intptrty in size, otherwise1230// there is a truncation or extension that we aren't modeling.1231if (CE0->getOpcode() == Instruction::PtrToInt) {1232Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());1233if (CE0->getType() == IntPtrTy &&1234CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {1235return ConstantFoldCompareInstOperands(1236Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);1237}1238}1239}1240}1241
1242// Convert pointer comparison (base+offset1) pred (base+offset2) into1243// offset1 pred offset2, for the case where the offset is inbounds. This1244// only works for equality and unsigned comparison, as inbounds permits1245// crossing the sign boundary. However, the offset comparison itself is1246// signed.1247if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {1248unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());1249APInt Offset0(IndexWidth, 0);1250Value *Stripped0 =1251Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0);1252APInt Offset1(IndexWidth, 0);1253Value *Stripped1 =1254Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1);1255if (Stripped0 == Stripped1)1256return ConstantInt::getBool(1257Ops0->getContext(),1258ICmpInst::compare(Offset0, Offset1,1259ICmpInst::getSignedPredicate(Predicate)));1260}1261} else if (isa<ConstantExpr>(Ops1)) {1262// If RHS is a constant expression, but the left side isn't, swap the1263// operands and try again.1264Predicate = ICmpInst::getSwappedPredicate(Predicate);1265return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);1266}1267
1268// Flush any denormal constant float input according to denormal handling1269// mode.1270Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);1271if (!Ops0)1272return nullptr;1273Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);1274if (!Ops1)1275return nullptr;1276
1277return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1);1278}
1279
1280Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,1281const DataLayout &DL) {1282assert(Instruction::isUnaryOp(Opcode));1283
1284return ConstantFoldUnaryInstruction(Opcode, Op);1285}
1286
1287Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,1288Constant *RHS,1289const DataLayout &DL) {1290assert(Instruction::isBinaryOp(Opcode));1291if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))1292if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))1293return C;1294
1295if (ConstantExpr::isDesirableBinOp(Opcode))1296return ConstantExpr::get(Opcode, LHS, RHS);1297return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);1298}
1299
1300Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,1301bool IsOutput) {1302if (!I || !I->getParent() || !I->getFunction())1303return Operand;1304
1305ConstantFP *CFP = dyn_cast<ConstantFP>(Operand);1306if (!CFP)1307return Operand;1308
1309const APFloat &APF = CFP->getValueAPF();1310// TODO: Should this canonicalize nans?1311if (!APF.isDenormal())1312return Operand;1313
1314Type *Ty = CFP->getType();1315DenormalMode DenormMode =1316I->getFunction()->getDenormalMode(Ty->getFltSemantics());1317DenormalMode::DenormalModeKind Mode =1318IsOutput ? DenormMode.Output : DenormMode.Input;1319switch (Mode) {1320default:1321llvm_unreachable("unknown denormal mode");1322case DenormalMode::Dynamic:1323return nullptr;1324case DenormalMode::IEEE:1325return Operand;1326case DenormalMode::PreserveSign:1327if (APF.isDenormal()) {1328return ConstantFP::get(1329Ty->getContext(),1330APFloat::getZero(Ty->getFltSemantics(), APF.isNegative()));1331}1332return Operand;1333case DenormalMode::PositiveZero:1334if (APF.isDenormal()) {1335return ConstantFP::get(Ty->getContext(),1336APFloat::getZero(Ty->getFltSemantics(), false));1337}1338return Operand;1339}1340return Operand;1341}
1342
1343Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,1344Constant *RHS, const DataLayout &DL,1345const Instruction *I,1346bool AllowNonDeterministic) {1347if (Instruction::isBinaryOp(Opcode)) {1348// Flush denormal inputs if needed.1349Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);1350if (!Op0)1351return nullptr;1352Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);1353if (!Op1)1354return nullptr;1355
1356// If nsz or an algebraic FMF flag is set, the result of the FP operation1357// may change due to future optimization. Don't constant fold them if1358// non-deterministic results are not allowed.1359if (!AllowNonDeterministic)1360if (auto *FP = dyn_cast_or_null<FPMathOperator>(I))1361if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||1362FP->hasAllowContract() || FP->hasAllowReciprocal())1363return nullptr;1364
1365// Calculate constant result.1366Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);1367if (!C)1368return nullptr;1369
1370// Flush denormal output if needed.1371C = FlushFPConstant(C, I, /* IsOutput */ true);1372if (!C)1373return nullptr;1374
1375// The precise NaN value is non-deterministic.1376if (!AllowNonDeterministic && C->isNaN())1377return nullptr;1378
1379return C;1380}1381// If instruction lacks a parent/function and the denormal mode cannot be1382// determined, use the default (IEEE).1383return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);1384}
1385
1386Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,1387Type *DestTy, const DataLayout &DL) {1388assert(Instruction::isCast(Opcode));1389switch (Opcode) {1390default:1391llvm_unreachable("Missing case");1392case Instruction::PtrToInt:1393if (auto *CE = dyn_cast<ConstantExpr>(C)) {1394Constant *FoldedValue = nullptr;1395// If the input is a inttoptr, eliminate the pair. This requires knowing1396// the width of a pointer, so it can't be done in ConstantExpr::getCast.1397if (CE->getOpcode() == Instruction::IntToPtr) {1398// zext/trunc the inttoptr to pointer size.1399FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0),1400DL.getIntPtrType(CE->getType()),1401/*IsSigned=*/false, DL);1402} else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {1403// If we have GEP, we can perform the following folds:1404// (ptrtoint (gep null, x)) -> x1405// (ptrtoint (gep (gep null, x), y) -> x + y, etc.1406unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());1407APInt BaseOffset(BitWidth, 0);1408auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(1409DL, BaseOffset, /*AllowNonInbounds=*/true));1410if (Base->isNullValue()) {1411FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);1412} else {1413// ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V1414if (GEP->getNumIndices() == 1 &&1415GEP->getSourceElementType()->isIntegerTy(8)) {1416auto *Ptr = cast<Constant>(GEP->getPointerOperand());1417auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));1418Type *IntIdxTy = DL.getIndexType(Ptr->getType());1419if (Sub && Sub->getType() == IntIdxTy &&1420Sub->getOpcode() == Instruction::Sub &&1421Sub->getOperand(0)->isNullValue())1422FoldedValue = ConstantExpr::getSub(1423ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));1424}1425}1426}1427if (FoldedValue) {1428// Do a zext or trunc to get to the ptrtoint dest size.1429return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false,1430DL);1431}1432}1433break;1434case Instruction::IntToPtr:1435// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if1436// the int size is >= the ptr size and the address spaces are the same.1437// This requires knowing the width of a pointer, so it can't be done in1438// ConstantExpr::getCast.1439if (auto *CE = dyn_cast<ConstantExpr>(C)) {1440if (CE->getOpcode() == Instruction::PtrToInt) {1441Constant *SrcPtr = CE->getOperand(0);1442unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());1443unsigned MidIntSize = CE->getType()->getScalarSizeInBits();1444
1445if (MidIntSize >= SrcPtrSize) {1446unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();1447if (SrcAS == DestTy->getPointerAddressSpace())1448return FoldBitCast(CE->getOperand(0), DestTy, DL);1449}1450}1451}1452break;1453case Instruction::Trunc:1454case Instruction::ZExt:1455case Instruction::SExt:1456case Instruction::FPTrunc:1457case Instruction::FPExt:1458case Instruction::UIToFP:1459case Instruction::SIToFP:1460case Instruction::FPToUI:1461case Instruction::FPToSI:1462case Instruction::AddrSpaceCast:1463break;1464case Instruction::BitCast:1465return FoldBitCast(C, DestTy, DL);1466}1467
1468if (ConstantExpr::isDesirableCastOp(Opcode))1469return ConstantExpr::getCast(Opcode, C, DestTy);1470return ConstantFoldCastInstruction(Opcode, C, DestTy);1471}
1472
1473Constant *llvm::ConstantFoldIntegerCast(Constant *C, Type *DestTy,1474bool IsSigned, const DataLayout &DL) {1475Type *SrcTy = C->getType();1476if (SrcTy == DestTy)1477return C;1478if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())1479return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL);1480if (IsSigned)1481return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL);1482return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL);1483}
1484
1485//===----------------------------------------------------------------------===//
1486// Constant Folding for Calls
1487//
1488
1489bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {1490if (Call->isNoBuiltin())1491return false;1492if (Call->getFunctionType() != F->getFunctionType())1493return false;1494switch (F->getIntrinsicID()) {1495// Operations that do not operate floating-point numbers and do not depend on1496// FP environment can be folded even in strictfp functions.1497case Intrinsic::bswap:1498case Intrinsic::ctpop:1499case Intrinsic::ctlz:1500case Intrinsic::cttz:1501case Intrinsic::fshl:1502case Intrinsic::fshr:1503case Intrinsic::launder_invariant_group:1504case Intrinsic::strip_invariant_group:1505case Intrinsic::masked_load:1506case Intrinsic::get_active_lane_mask:1507case Intrinsic::abs:1508case Intrinsic::smax:1509case Intrinsic::smin:1510case Intrinsic::umax:1511case Intrinsic::umin:1512case Intrinsic::scmp:1513case Intrinsic::ucmp:1514case Intrinsic::sadd_with_overflow:1515case Intrinsic::uadd_with_overflow:1516case Intrinsic::ssub_with_overflow:1517case Intrinsic::usub_with_overflow:1518case Intrinsic::smul_with_overflow:1519case Intrinsic::umul_with_overflow:1520case Intrinsic::sadd_sat:1521case Intrinsic::uadd_sat:1522case Intrinsic::ssub_sat:1523case Intrinsic::usub_sat:1524case Intrinsic::smul_fix:1525case Intrinsic::smul_fix_sat:1526case Intrinsic::bitreverse:1527case Intrinsic::is_constant:1528case Intrinsic::vector_reduce_add:1529case Intrinsic::vector_reduce_mul:1530case Intrinsic::vector_reduce_and:1531case Intrinsic::vector_reduce_or:1532case Intrinsic::vector_reduce_xor:1533case Intrinsic::vector_reduce_smin:1534case Intrinsic::vector_reduce_smax:1535case Intrinsic::vector_reduce_umin:1536case Intrinsic::vector_reduce_umax:1537// Target intrinsics1538case Intrinsic::amdgcn_perm:1539case Intrinsic::amdgcn_wave_reduce_umin:1540case Intrinsic::amdgcn_wave_reduce_umax:1541case Intrinsic::amdgcn_s_wqm:1542case Intrinsic::amdgcn_s_quadmask:1543case Intrinsic::amdgcn_s_bitreplicate:1544case Intrinsic::arm_mve_vctp8:1545case Intrinsic::arm_mve_vctp16:1546case Intrinsic::arm_mve_vctp32:1547case Intrinsic::arm_mve_vctp64:1548case Intrinsic::aarch64_sve_convert_from_svbool:1549// WebAssembly float semantics are always known1550case Intrinsic::wasm_trunc_signed:1551case Intrinsic::wasm_trunc_unsigned:1552return true;1553
1554// Floating point operations cannot be folded in strictfp functions in1555// general case. They can be folded if FP environment is known to compiler.1556case Intrinsic::minnum:1557case Intrinsic::maxnum:1558case Intrinsic::minimum:1559case Intrinsic::maximum:1560case Intrinsic::log:1561case Intrinsic::log2:1562case Intrinsic::log10:1563case Intrinsic::exp:1564case Intrinsic::exp2:1565case Intrinsic::exp10:1566case Intrinsic::sqrt:1567case Intrinsic::sin:1568case Intrinsic::cos:1569case Intrinsic::pow:1570case Intrinsic::powi:1571case Intrinsic::ldexp:1572case Intrinsic::fma:1573case Intrinsic::fmuladd:1574case Intrinsic::frexp:1575case Intrinsic::fptoui_sat:1576case Intrinsic::fptosi_sat:1577case Intrinsic::convert_from_fp16:1578case Intrinsic::convert_to_fp16:1579case Intrinsic::amdgcn_cos:1580case Intrinsic::amdgcn_cubeid:1581case Intrinsic::amdgcn_cubema:1582case Intrinsic::amdgcn_cubesc:1583case Intrinsic::amdgcn_cubetc:1584case Intrinsic::amdgcn_fmul_legacy:1585case Intrinsic::amdgcn_fma_legacy:1586case Intrinsic::amdgcn_fract:1587case Intrinsic::amdgcn_sin:1588// The intrinsics below depend on rounding mode in MXCSR.1589case Intrinsic::x86_sse_cvtss2si:1590case Intrinsic::x86_sse_cvtss2si64:1591case Intrinsic::x86_sse_cvttss2si:1592case Intrinsic::x86_sse_cvttss2si64:1593case Intrinsic::x86_sse2_cvtsd2si:1594case Intrinsic::x86_sse2_cvtsd2si64:1595case Intrinsic::x86_sse2_cvttsd2si:1596case Intrinsic::x86_sse2_cvttsd2si64:1597case Intrinsic::x86_avx512_vcvtss2si32:1598case Intrinsic::x86_avx512_vcvtss2si64:1599case Intrinsic::x86_avx512_cvttss2si:1600case Intrinsic::x86_avx512_cvttss2si64:1601case Intrinsic::x86_avx512_vcvtsd2si32:1602case Intrinsic::x86_avx512_vcvtsd2si64:1603case Intrinsic::x86_avx512_cvttsd2si:1604case Intrinsic::x86_avx512_cvttsd2si64:1605case Intrinsic::x86_avx512_vcvtss2usi32:1606case Intrinsic::x86_avx512_vcvtss2usi64:1607case Intrinsic::x86_avx512_cvttss2usi:1608case Intrinsic::x86_avx512_cvttss2usi64:1609case Intrinsic::x86_avx512_vcvtsd2usi32:1610case Intrinsic::x86_avx512_vcvtsd2usi64:1611case Intrinsic::x86_avx512_cvttsd2usi:1612case Intrinsic::x86_avx512_cvttsd2usi64:1613return !Call->isStrictFP();1614
1615// Sign operations are actually bitwise operations, they do not raise1616// exceptions even for SNANs.1617case Intrinsic::fabs:1618case Intrinsic::copysign:1619case Intrinsic::is_fpclass:1620// Non-constrained variants of rounding operations means default FP1621// environment, they can be folded in any case.1622case Intrinsic::ceil:1623case Intrinsic::floor:1624case Intrinsic::round:1625case Intrinsic::roundeven:1626case Intrinsic::trunc:1627case Intrinsic::nearbyint:1628case Intrinsic::rint:1629case Intrinsic::canonicalize:1630// Constrained intrinsics can be folded if FP environment is known1631// to compiler.1632case Intrinsic::experimental_constrained_fma:1633case Intrinsic::experimental_constrained_fmuladd:1634case Intrinsic::experimental_constrained_fadd:1635case Intrinsic::experimental_constrained_fsub:1636case Intrinsic::experimental_constrained_fmul:1637case Intrinsic::experimental_constrained_fdiv:1638case Intrinsic::experimental_constrained_frem:1639case Intrinsic::experimental_constrained_ceil:1640case Intrinsic::experimental_constrained_floor:1641case Intrinsic::experimental_constrained_round:1642case Intrinsic::experimental_constrained_roundeven:1643case Intrinsic::experimental_constrained_trunc:1644case Intrinsic::experimental_constrained_nearbyint:1645case Intrinsic::experimental_constrained_rint:1646case Intrinsic::experimental_constrained_fcmp:1647case Intrinsic::experimental_constrained_fcmps:1648return true;1649default:1650return false;1651case Intrinsic::not_intrinsic: break;1652}1653
1654if (!F->hasName() || Call->isStrictFP())1655return false;1656
1657// In these cases, the check of the length is required. We don't want to1658// return true for a name like "cos\0blah" which strcmp would return equal to1659// "cos", but has length 8.1660StringRef Name = F->getName();1661switch (Name[0]) {1662default:1663return false;1664case 'a':1665return Name == "acos" || Name == "acosf" ||1666Name == "asin" || Name == "asinf" ||1667Name == "atan" || Name == "atanf" ||1668Name == "atan2" || Name == "atan2f";1669case 'c':1670return Name == "ceil" || Name == "ceilf" ||1671Name == "cos" || Name == "cosf" ||1672Name == "cosh" || Name == "coshf";1673case 'e':1674return Name == "exp" || Name == "expf" ||1675Name == "exp2" || Name == "exp2f";1676case 'f':1677return Name == "fabs" || Name == "fabsf" ||1678Name == "floor" || Name == "floorf" ||1679Name == "fmod" || Name == "fmodf";1680case 'l':1681return Name == "log" || Name == "logf" || Name == "log2" ||1682Name == "log2f" || Name == "log10" || Name == "log10f" ||1683Name == "logl";1684case 'n':1685return Name == "nearbyint" || Name == "nearbyintf";1686case 'p':1687return Name == "pow" || Name == "powf";1688case 'r':1689return Name == "remainder" || Name == "remainderf" ||1690Name == "rint" || Name == "rintf" ||1691Name == "round" || Name == "roundf";1692case 's':1693return Name == "sin" || Name == "sinf" ||1694Name == "sinh" || Name == "sinhf" ||1695Name == "sqrt" || Name == "sqrtf";1696case 't':1697return Name == "tan" || Name == "tanf" ||1698Name == "tanh" || Name == "tanhf" ||1699Name == "trunc" || Name == "truncf";1700case '_':1701// Check for various function names that get used for the math functions1702// when the header files are preprocessed with the macro1703// __FINITE_MATH_ONLY__ enabled.1704// The '12' here is the length of the shortest name that can match.1705// We need to check the size before looking at Name[1] and Name[2]1706// so we may as well check a limit that will eliminate mismatches.1707if (Name.size() < 12 || Name[1] != '_')1708return false;1709switch (Name[2]) {1710default:1711return false;1712case 'a':1713return Name == "__acos_finite" || Name == "__acosf_finite" ||1714Name == "__asin_finite" || Name == "__asinf_finite" ||1715Name == "__atan2_finite" || Name == "__atan2f_finite";1716case 'c':1717return Name == "__cosh_finite" || Name == "__coshf_finite";1718case 'e':1719return Name == "__exp_finite" || Name == "__expf_finite" ||1720Name == "__exp2_finite" || Name == "__exp2f_finite";1721case 'l':1722return Name == "__log_finite" || Name == "__logf_finite" ||1723Name == "__log10_finite" || Name == "__log10f_finite";1724case 'p':1725return Name == "__pow_finite" || Name == "__powf_finite";1726case 's':1727return Name == "__sinh_finite" || Name == "__sinhf_finite";1728}1729}1730}
1731
1732namespace {1733
1734Constant *GetConstantFoldFPValue(double V, Type *Ty) {1735if (Ty->isHalfTy() || Ty->isFloatTy()) {1736APFloat APF(V);1737bool unused;1738APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);1739return ConstantFP::get(Ty->getContext(), APF);1740}1741if (Ty->isDoubleTy())1742return ConstantFP::get(Ty->getContext(), APFloat(V));1743llvm_unreachable("Can only constant fold half/float/double");1744}
1745
1746#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)1747Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {1748if (Ty->isFP128Ty())1749return ConstantFP::get(Ty, V);1750llvm_unreachable("Can only constant fold fp128");1751}
1752#endif1753
1754/// Clear the floating-point exception state.
1755inline void llvm_fenv_clearexcept() {1756#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT1757feclearexcept(FE_ALL_EXCEPT);1758#endif1759errno = 0;1760}
1761
1762/// Test if a floating-point exception was raised.
1763inline bool llvm_fenv_testexcept() {1764int errno_val = errno;1765if (errno_val == ERANGE || errno_val == EDOM)1766return true;1767#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT1768if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))1769return true;1770#endif1771return false;1772}
1773
1774Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,1775Type *Ty) {1776llvm_fenv_clearexcept();1777double Result = NativeFP(V.convertToDouble());1778if (llvm_fenv_testexcept()) {1779llvm_fenv_clearexcept();1780return nullptr;1781}1782
1783return GetConstantFoldFPValue(Result, Ty);1784}
1785
1786#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)1787Constant *ConstantFoldFP128(long double (*NativeFP)(long double),1788const APFloat &V, Type *Ty) {1789llvm_fenv_clearexcept();1790float128 Result = NativeFP(V.convertToQuad());1791if (llvm_fenv_testexcept()) {1792llvm_fenv_clearexcept();1793return nullptr;1794}1795
1796return GetConstantFoldFPValue128(Result, Ty);1797}
1798#endif1799
1800Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),1801const APFloat &V, const APFloat &W, Type *Ty) {1802llvm_fenv_clearexcept();1803double Result = NativeFP(V.convertToDouble(), W.convertToDouble());1804if (llvm_fenv_testexcept()) {1805llvm_fenv_clearexcept();1806return nullptr;1807}1808
1809return GetConstantFoldFPValue(Result, Ty);1810}
1811
1812Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {1813FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());1814if (!VT)1815return nullptr;1816
1817// This isn't strictly necessary, but handle the special/common case of zero:1818// all integer reductions of a zero input produce zero.1819if (isa<ConstantAggregateZero>(Op))1820return ConstantInt::get(VT->getElementType(), 0);1821
1822// This is the same as the underlying binops - poison propagates.1823if (isa<PoisonValue>(Op) || Op->containsPoisonElement())1824return PoisonValue::get(VT->getElementType());1825
1826// TODO: Handle undef.1827if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))1828return nullptr;1829
1830auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));1831if (!EltC)1832return nullptr;1833
1834APInt Acc = EltC->getValue();1835for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {1836if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))1837return nullptr;1838const APInt &X = EltC->getValue();1839switch (IID) {1840case Intrinsic::vector_reduce_add:1841Acc = Acc + X;1842break;1843case Intrinsic::vector_reduce_mul:1844Acc = Acc * X;1845break;1846case Intrinsic::vector_reduce_and:1847Acc = Acc & X;1848break;1849case Intrinsic::vector_reduce_or:1850Acc = Acc | X;1851break;1852case Intrinsic::vector_reduce_xor:1853Acc = Acc ^ X;1854break;1855case Intrinsic::vector_reduce_smin:1856Acc = APIntOps::smin(Acc, X);1857break;1858case Intrinsic::vector_reduce_smax:1859Acc = APIntOps::smax(Acc, X);1860break;1861case Intrinsic::vector_reduce_umin:1862Acc = APIntOps::umin(Acc, X);1863break;1864case Intrinsic::vector_reduce_umax:1865Acc = APIntOps::umax(Acc, X);1866break;1867}1868}1869
1870return ConstantInt::get(Op->getContext(), Acc);1871}
1872
1873/// Attempt to fold an SSE floating point to integer conversion of a constant
1874/// floating point. If roundTowardZero is false, the default IEEE rounding is
1875/// used (toward nearest, ties to even). This matches the behavior of the
1876/// non-truncating SSE instructions in the default rounding mode. The desired
1877/// integer type Ty is used to select how many bits are available for the
1878/// result. Returns null if the conversion cannot be performed, otherwise
1879/// returns the Constant value resulting from the conversion.
1880Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,1881Type *Ty, bool IsSigned) {1882// All of these conversion intrinsics form an integer of at most 64bits.1883unsigned ResultWidth = Ty->getIntegerBitWidth();1884assert(ResultWidth <= 64 &&1885"Can only constant fold conversions to 64 and 32 bit ints");1886
1887uint64_t UIntVal;1888bool isExact = false;1889APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero1890: APFloat::rmNearestTiesToEven;1891APFloat::opStatus status =1892Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth,1893IsSigned, mode, &isExact);1894if (status != APFloat::opOK &&1895(!roundTowardZero || status != APFloat::opInexact))1896return nullptr;1897return ConstantInt::get(Ty, UIntVal, IsSigned);1898}
1899
1900double getValueAsDouble(ConstantFP *Op) {1901Type *Ty = Op->getType();1902
1903if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())1904return Op->getValueAPF().convertToDouble();1905
1906bool unused;1907APFloat APF = Op->getValueAPF();1908APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);1909return APF.convertToDouble();1910}
1911
1912static bool getConstIntOrUndef(Value *Op, const APInt *&C) {1913if (auto *CI = dyn_cast<ConstantInt>(Op)) {1914C = &CI->getValue();1915return true;1916}1917if (isa<UndefValue>(Op)) {1918C = nullptr;1919return true;1920}1921return false;1922}
1923
1924/// Checks if the given intrinsic call, which evaluates to constant, is allowed
1925/// to be folded.
1926///
1927/// \param CI Constrained intrinsic call.
1928/// \param St Exception flags raised during constant evaluation.
1929static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,1930APFloat::opStatus St) {1931std::optional<RoundingMode> ORM = CI->getRoundingMode();1932std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();1933
1934// If the operation does not change exception status flags, it is safe1935// to fold.1936if (St == APFloat::opStatus::opOK)1937return true;1938
1939// If evaluation raised FP exception, the result can depend on rounding1940// mode. If the latter is unknown, folding is not possible.1941if (ORM && *ORM == RoundingMode::Dynamic)1942return false;1943
1944// If FP exceptions are ignored, fold the call, even if such exception is1945// raised.1946if (EB && *EB != fp::ExceptionBehavior::ebStrict)1947return true;1948
1949// Leave the calculation for runtime so that exception flags be correctly set1950// in hardware.1951return false;1952}
1953
1954/// Returns the rounding mode that should be used for constant evaluation.
1955static RoundingMode1956getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {1957std::optional<RoundingMode> ORM = CI->getRoundingMode();1958if (!ORM || *ORM == RoundingMode::Dynamic)1959// Even if the rounding mode is unknown, try evaluating the operation.1960// If it does not raise inexact exception, rounding was not applied,1961// so the result is exact and does not depend on rounding mode. Whether1962// other FP exceptions are raised, it does not depend on rounding mode.1963return RoundingMode::NearestTiesToEven;1964return *ORM;1965}
1966
1967/// Try to constant fold llvm.canonicalize for the given caller and value.
1968static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,1969const APFloat &Src) {1970// Zero, positive and negative, is always OK to fold.1971if (Src.isZero()) {1972// Get a fresh 0, since ppc_fp128 does have non-canonical zeros.1973return ConstantFP::get(1974CI->getContext(),1975APFloat::getZero(Src.getSemantics(), Src.isNegative()));1976}1977
1978if (!Ty->isIEEELikeFPTy())1979return nullptr;1980
1981// Zero is always canonical and the sign must be preserved.1982//1983// Denorms and nans may have special encodings, but it should be OK to fold a1984// totally average number.1985if (Src.isNormal() || Src.isInfinity())1986return ConstantFP::get(CI->getContext(), Src);1987
1988if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {1989DenormalMode DenormMode =1990CI->getFunction()->getDenormalMode(Src.getSemantics());1991
1992if (DenormMode == DenormalMode::getIEEE())1993return ConstantFP::get(CI->getContext(), Src);1994
1995if (DenormMode.Input == DenormalMode::Dynamic)1996return nullptr;1997
1998// If we know if either input or output is flushed, we can fold.1999if ((DenormMode.Input == DenormalMode::Dynamic &&2000DenormMode.Output == DenormalMode::IEEE) ||2001(DenormMode.Input == DenormalMode::IEEE &&2002DenormMode.Output == DenormalMode::Dynamic))2003return nullptr;2004
2005bool IsPositive =2006(!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||2007(DenormMode.Output == DenormalMode::PositiveZero &&2008DenormMode.Input == DenormalMode::IEEE));2009
2010return ConstantFP::get(CI->getContext(),2011APFloat::getZero(Src.getSemantics(), !IsPositive));2012}2013
2014return nullptr;2015}
2016
2017static Constant *ConstantFoldScalarCall1(StringRef Name,2018Intrinsic::ID IntrinsicID,2019Type *Ty,2020ArrayRef<Constant *> Operands,2021const TargetLibraryInfo *TLI,2022const CallBase *Call) {2023assert(Operands.size() == 1 && "Wrong number of operands.");2024
2025if (IntrinsicID == Intrinsic::is_constant) {2026// We know we have a "Constant" argument. But we want to only2027// return true for manifest constants, not those that depend on2028// constants with unknowable values, e.g. GlobalValue or BlockAddress.2029if (Operands[0]->isManifestConstant())2030return ConstantInt::getTrue(Ty->getContext());2031return nullptr;2032}2033
2034if (isa<PoisonValue>(Operands[0])) {2035// TODO: All of these operations should probably propagate poison.2036if (IntrinsicID == Intrinsic::canonicalize)2037return PoisonValue::get(Ty);2038}2039
2040if (isa<UndefValue>(Operands[0])) {2041// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.2042// ctpop() is between 0 and bitwidth, pick 0 for undef.2043// fptoui.sat and fptosi.sat can always fold to zero (for a zero input).2044if (IntrinsicID == Intrinsic::cos ||2045IntrinsicID == Intrinsic::ctpop ||2046IntrinsicID == Intrinsic::fptoui_sat ||2047IntrinsicID == Intrinsic::fptosi_sat ||2048IntrinsicID == Intrinsic::canonicalize)2049return Constant::getNullValue(Ty);2050if (IntrinsicID == Intrinsic::bswap ||2051IntrinsicID == Intrinsic::bitreverse ||2052IntrinsicID == Intrinsic::launder_invariant_group ||2053IntrinsicID == Intrinsic::strip_invariant_group)2054return Operands[0];2055}2056
2057if (isa<ConstantPointerNull>(Operands[0])) {2058// launder(null) == null == strip(null) iff in addrspace 02059if (IntrinsicID == Intrinsic::launder_invariant_group ||2060IntrinsicID == Intrinsic::strip_invariant_group) {2061// If instruction is not yet put in a basic block (e.g. when cloning2062// a function during inlining), Call's caller may not be available.2063// So check Call's BB first before querying Call->getCaller.2064const Function *Caller =2065Call->getParent() ? Call->getCaller() : nullptr;2066if (Caller &&2067!NullPointerIsDefined(2068Caller, Operands[0]->getType()->getPointerAddressSpace())) {2069return Operands[0];2070}2071return nullptr;2072}2073}2074
2075if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {2076if (IntrinsicID == Intrinsic::convert_to_fp16) {2077APFloat Val(Op->getValueAPF());2078
2079bool lost = false;2080Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);2081
2082return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());2083}2084
2085APFloat U = Op->getValueAPF();2086
2087if (IntrinsicID == Intrinsic::wasm_trunc_signed ||2088IntrinsicID == Intrinsic::wasm_trunc_unsigned) {2089bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;2090
2091if (U.isNaN())2092return nullptr;2093
2094unsigned Width = Ty->getIntegerBitWidth();2095APSInt Int(Width, !Signed);2096bool IsExact = false;2097APFloat::opStatus Status =2098U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);2099
2100if (Status == APFloat::opOK || Status == APFloat::opInexact)2101return ConstantInt::get(Ty, Int);2102
2103return nullptr;2104}2105
2106if (IntrinsicID == Intrinsic::fptoui_sat ||2107IntrinsicID == Intrinsic::fptosi_sat) {2108// convertToInteger() already has the desired saturation semantics.2109APSInt Int(Ty->getIntegerBitWidth(),2110IntrinsicID == Intrinsic::fptoui_sat);2111bool IsExact;2112U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);2113return ConstantInt::get(Ty, Int);2114}2115
2116if (IntrinsicID == Intrinsic::canonicalize)2117return constantFoldCanonicalize(Ty, Call, U);2118
2119#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)2120if (Ty->isFP128Ty()) {2121if (IntrinsicID == Intrinsic::log) {2122float128 Result = logf128(Op->getValueAPF().convertToQuad());2123return GetConstantFoldFPValue128(Result, Ty);2124}2125
2126LibFunc Fp128Func = NotLibFunc;2127if (TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) &&2128Fp128Func == LibFunc_logl)2129return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty);2130}2131#endif2132
2133if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())2134return nullptr;2135
2136// Use internal versions of these intrinsics.2137
2138if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {2139U.roundToIntegral(APFloat::rmNearestTiesToEven);2140return ConstantFP::get(Ty->getContext(), U);2141}2142
2143if (IntrinsicID == Intrinsic::round) {2144U.roundToIntegral(APFloat::rmNearestTiesToAway);2145return ConstantFP::get(Ty->getContext(), U);2146}2147
2148if (IntrinsicID == Intrinsic::roundeven) {2149U.roundToIntegral(APFloat::rmNearestTiesToEven);2150return ConstantFP::get(Ty->getContext(), U);2151}2152
2153if (IntrinsicID == Intrinsic::ceil) {2154U.roundToIntegral(APFloat::rmTowardPositive);2155return ConstantFP::get(Ty->getContext(), U);2156}2157
2158if (IntrinsicID == Intrinsic::floor) {2159U.roundToIntegral(APFloat::rmTowardNegative);2160return ConstantFP::get(Ty->getContext(), U);2161}2162
2163if (IntrinsicID == Intrinsic::trunc) {2164U.roundToIntegral(APFloat::rmTowardZero);2165return ConstantFP::get(Ty->getContext(), U);2166}2167
2168if (IntrinsicID == Intrinsic::fabs) {2169U.clearSign();2170return ConstantFP::get(Ty->getContext(), U);2171}2172
2173if (IntrinsicID == Intrinsic::amdgcn_fract) {2174// The v_fract instruction behaves like the OpenCL spec, which defines2175// fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is2176// there to prevent fract(-small) from returning 1.0. It returns the2177// largest positive floating-point number less than 1.0."2178APFloat FloorU(U);2179FloorU.roundToIntegral(APFloat::rmTowardNegative);2180APFloat FractU(U - FloorU);2181APFloat AlmostOne(U.getSemantics(), 1);2182AlmostOne.next(/*nextDown*/ true);2183return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));2184}2185
2186// Rounding operations (floor, trunc, ceil, round and nearbyint) do not2187// raise FP exceptions, unless the argument is signaling NaN.2188
2189std::optional<APFloat::roundingMode> RM;2190switch (IntrinsicID) {2191default:2192break;2193case Intrinsic::experimental_constrained_nearbyint:2194case Intrinsic::experimental_constrained_rint: {2195auto CI = cast<ConstrainedFPIntrinsic>(Call);2196RM = CI->getRoundingMode();2197if (!RM || *RM == RoundingMode::Dynamic)2198return nullptr;2199break;2200}2201case Intrinsic::experimental_constrained_round:2202RM = APFloat::rmNearestTiesToAway;2203break;2204case Intrinsic::experimental_constrained_ceil:2205RM = APFloat::rmTowardPositive;2206break;2207case Intrinsic::experimental_constrained_floor:2208RM = APFloat::rmTowardNegative;2209break;2210case Intrinsic::experimental_constrained_trunc:2211RM = APFloat::rmTowardZero;2212break;2213}2214if (RM) {2215auto CI = cast<ConstrainedFPIntrinsic>(Call);2216if (U.isFinite()) {2217APFloat::opStatus St = U.roundToIntegral(*RM);2218if (IntrinsicID == Intrinsic::experimental_constrained_rint &&2219St == APFloat::opInexact) {2220std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();2221if (EB && *EB == fp::ebStrict)2222return nullptr;2223}2224} else if (U.isSignaling()) {2225std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();2226if (EB && *EB != fp::ebIgnore)2227return nullptr;2228U = APFloat::getQNaN(U.getSemantics());2229}2230return ConstantFP::get(Ty->getContext(), U);2231}2232
2233/// We only fold functions with finite arguments. Folding NaN and inf is2234/// likely to be aborted with an exception anyway, and some host libms2235/// have known errors raising exceptions.2236if (!U.isFinite())2237return nullptr;2238
2239/// Currently APFloat versions of these functions do not exist, so we use2240/// the host native double versions. Float versions are not called2241/// directly but for all these it is true (float)(f((double)arg)) ==2242/// f(arg). Long double not supported yet.2243const APFloat &APF = Op->getValueAPF();2244
2245switch (IntrinsicID) {2246default: break;2247case Intrinsic::log:2248return ConstantFoldFP(log, APF, Ty);2249case Intrinsic::log2:2250// TODO: What about hosts that lack a C99 library?2251return ConstantFoldFP(log2, APF, Ty);2252case Intrinsic::log10:2253// TODO: What about hosts that lack a C99 library?2254return ConstantFoldFP(log10, APF, Ty);2255case Intrinsic::exp:2256return ConstantFoldFP(exp, APF, Ty);2257case Intrinsic::exp2:2258// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.2259return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);2260case Intrinsic::exp10:2261// Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.2262return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty);2263case Intrinsic::sin:2264return ConstantFoldFP(sin, APF, Ty);2265case Intrinsic::cos:2266return ConstantFoldFP(cos, APF, Ty);2267case Intrinsic::sqrt:2268return ConstantFoldFP(sqrt, APF, Ty);2269case Intrinsic::amdgcn_cos:2270case Intrinsic::amdgcn_sin: {2271double V = getValueAsDouble(Op);2272if (V < -256.0 || V > 256.0)2273// The gfx8 and gfx9 architectures handle arguments outside the range2274// [-256, 256] differently. This should be a rare case so bail out2275// rather than trying to handle the difference.2276return nullptr;2277bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;2278double V4 = V * 4.0;2279if (V4 == floor(V4)) {2280// Force exact results for quarter-integer inputs.2281const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };2282V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];2283} else {2284if (IsCos)2285V = cos(V * 2.0 * numbers::pi);2286else2287V = sin(V * 2.0 * numbers::pi);2288}2289return GetConstantFoldFPValue(V, Ty);2290}2291}2292
2293if (!TLI)2294return nullptr;2295
2296LibFunc Func = NotLibFunc;2297if (!TLI->getLibFunc(Name, Func))2298return nullptr;2299
2300switch (Func) {2301default:2302break;2303case LibFunc_acos:2304case LibFunc_acosf:2305case LibFunc_acos_finite:2306case LibFunc_acosf_finite:2307if (TLI->has(Func))2308return ConstantFoldFP(acos, APF, Ty);2309break;2310case LibFunc_asin:2311case LibFunc_asinf:2312case LibFunc_asin_finite:2313case LibFunc_asinf_finite:2314if (TLI->has(Func))2315return ConstantFoldFP(asin, APF, Ty);2316break;2317case LibFunc_atan:2318case LibFunc_atanf:2319if (TLI->has(Func))2320return ConstantFoldFP(atan, APF, Ty);2321break;2322case LibFunc_ceil:2323case LibFunc_ceilf:2324if (TLI->has(Func)) {2325U.roundToIntegral(APFloat::rmTowardPositive);2326return ConstantFP::get(Ty->getContext(), U);2327}2328break;2329case LibFunc_cos:2330case LibFunc_cosf:2331if (TLI->has(Func))2332return ConstantFoldFP(cos, APF, Ty);2333break;2334case LibFunc_cosh:2335case LibFunc_coshf:2336case LibFunc_cosh_finite:2337case LibFunc_coshf_finite:2338if (TLI->has(Func))2339return ConstantFoldFP(cosh, APF, Ty);2340break;2341case LibFunc_exp:2342case LibFunc_expf:2343case LibFunc_exp_finite:2344case LibFunc_expf_finite:2345if (TLI->has(Func))2346return ConstantFoldFP(exp, APF, Ty);2347break;2348case LibFunc_exp2:2349case LibFunc_exp2f:2350case LibFunc_exp2_finite:2351case LibFunc_exp2f_finite:2352if (TLI->has(Func))2353// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.2354return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);2355break;2356case LibFunc_fabs:2357case LibFunc_fabsf:2358if (TLI->has(Func)) {2359U.clearSign();2360return ConstantFP::get(Ty->getContext(), U);2361}2362break;2363case LibFunc_floor:2364case LibFunc_floorf:2365if (TLI->has(Func)) {2366U.roundToIntegral(APFloat::rmTowardNegative);2367return ConstantFP::get(Ty->getContext(), U);2368}2369break;2370case LibFunc_log:2371case LibFunc_logf:2372case LibFunc_log_finite:2373case LibFunc_logf_finite:2374if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))2375return ConstantFoldFP(log, APF, Ty);2376break;2377case LibFunc_log2:2378case LibFunc_log2f:2379case LibFunc_log2_finite:2380case LibFunc_log2f_finite:2381if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))2382// TODO: What about hosts that lack a C99 library?2383return ConstantFoldFP(log2, APF, Ty);2384break;2385case LibFunc_log10:2386case LibFunc_log10f:2387case LibFunc_log10_finite:2388case LibFunc_log10f_finite:2389if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))2390// TODO: What about hosts that lack a C99 library?2391return ConstantFoldFP(log10, APF, Ty);2392break;2393case LibFunc_logl:2394return nullptr;2395case LibFunc_nearbyint:2396case LibFunc_nearbyintf:2397case LibFunc_rint:2398case LibFunc_rintf:2399if (TLI->has(Func)) {2400U.roundToIntegral(APFloat::rmNearestTiesToEven);2401return ConstantFP::get(Ty->getContext(), U);2402}2403break;2404case LibFunc_round:2405case LibFunc_roundf:2406if (TLI->has(Func)) {2407U.roundToIntegral(APFloat::rmNearestTiesToAway);2408return ConstantFP::get(Ty->getContext(), U);2409}2410break;2411case LibFunc_sin:2412case LibFunc_sinf:2413if (TLI->has(Func))2414return ConstantFoldFP(sin, APF, Ty);2415break;2416case LibFunc_sinh:2417case LibFunc_sinhf:2418case LibFunc_sinh_finite:2419case LibFunc_sinhf_finite:2420if (TLI->has(Func))2421return ConstantFoldFP(sinh, APF, Ty);2422break;2423case LibFunc_sqrt:2424case LibFunc_sqrtf:2425if (!APF.isNegative() && TLI->has(Func))2426return ConstantFoldFP(sqrt, APF, Ty);2427break;2428case LibFunc_tan:2429case LibFunc_tanf:2430if (TLI->has(Func))2431return ConstantFoldFP(tan, APF, Ty);2432break;2433case LibFunc_tanh:2434case LibFunc_tanhf:2435if (TLI->has(Func))2436return ConstantFoldFP(tanh, APF, Ty);2437break;2438case LibFunc_trunc:2439case LibFunc_truncf:2440if (TLI->has(Func)) {2441U.roundToIntegral(APFloat::rmTowardZero);2442return ConstantFP::get(Ty->getContext(), U);2443}2444break;2445}2446return nullptr;2447}2448
2449if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {2450switch (IntrinsicID) {2451case Intrinsic::bswap:2452return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());2453case Intrinsic::ctpop:2454return ConstantInt::get(Ty, Op->getValue().popcount());2455case Intrinsic::bitreverse:2456return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());2457case Intrinsic::convert_from_fp16: {2458APFloat Val(APFloat::IEEEhalf(), Op->getValue());2459
2460bool lost = false;2461APFloat::opStatus status = Val.convert(2462Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);2463
2464// Conversion is always precise.2465(void)status;2466assert(status != APFloat::opInexact && !lost &&2467"Precision lost during fp16 constfolding");2468
2469return ConstantFP::get(Ty->getContext(), Val);2470}2471
2472case Intrinsic::amdgcn_s_wqm: {2473uint64_t Val = Op->getZExtValue();2474Val |= (Val & 0x5555555555555555ULL) << 1 |2475((Val >> 1) & 0x5555555555555555ULL);2476Val |= (Val & 0x3333333333333333ULL) << 2 |2477((Val >> 2) & 0x3333333333333333ULL);2478return ConstantInt::get(Ty, Val);2479}2480
2481case Intrinsic::amdgcn_s_quadmask: {2482uint64_t Val = Op->getZExtValue();2483uint64_t QuadMask = 0;2484for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {2485if (!(Val & 0xF))2486continue;2487
2488QuadMask |= (1ULL << I);2489}2490return ConstantInt::get(Ty, QuadMask);2491}2492
2493case Intrinsic::amdgcn_s_bitreplicate: {2494uint64_t Val = Op->getZExtValue();2495Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;2496Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;2497Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;2498Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;2499Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;2500Val = Val | Val << 1;2501return ConstantInt::get(Ty, Val);2502}2503
2504default:2505return nullptr;2506}2507}2508
2509switch (IntrinsicID) {2510default: break;2511case Intrinsic::vector_reduce_add:2512case Intrinsic::vector_reduce_mul:2513case Intrinsic::vector_reduce_and:2514case Intrinsic::vector_reduce_or:2515case Intrinsic::vector_reduce_xor:2516case Intrinsic::vector_reduce_smin:2517case Intrinsic::vector_reduce_smax:2518case Intrinsic::vector_reduce_umin:2519case Intrinsic::vector_reduce_umax:2520if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))2521return C;2522break;2523}2524
2525// Support ConstantVector in case we have an Undef in the top.2526if (isa<ConstantVector>(Operands[0]) ||2527isa<ConstantDataVector>(Operands[0])) {2528auto *Op = cast<Constant>(Operands[0]);2529switch (IntrinsicID) {2530default: break;2531case Intrinsic::x86_sse_cvtss2si:2532case Intrinsic::x86_sse_cvtss2si64:2533case Intrinsic::x86_sse2_cvtsd2si:2534case Intrinsic::x86_sse2_cvtsd2si64:2535if (ConstantFP *FPOp =2536dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2537return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2538/*roundTowardZero=*/false, Ty,2539/*IsSigned*/true);2540break;2541case Intrinsic::x86_sse_cvttss2si:2542case Intrinsic::x86_sse_cvttss2si64:2543case Intrinsic::x86_sse2_cvttsd2si:2544case Intrinsic::x86_sse2_cvttsd2si64:2545if (ConstantFP *FPOp =2546dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2547return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2548/*roundTowardZero=*/true, Ty,2549/*IsSigned*/true);2550break;2551}2552}2553
2554return nullptr;2555}
2556
2557static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,2558const ConstrainedFPIntrinsic *Call) {2559APFloat::opStatus St = APFloat::opOK;2560auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call);2561FCmpInst::Predicate Cond = FCmp->getPredicate();2562if (FCmp->isSignaling()) {2563if (Op1.isNaN() || Op2.isNaN())2564St = APFloat::opInvalidOp;2565} else {2566if (Op1.isSignaling() || Op2.isSignaling())2567St = APFloat::opInvalidOp;2568}2569bool Result = FCmpInst::compare(Op1, Op2, Cond);2570if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))2571return ConstantInt::get(Call->getType()->getScalarType(), Result);2572return nullptr;2573}
2574
2575static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,2576ArrayRef<Constant *> Operands,2577const TargetLibraryInfo *TLI) {2578if (!TLI)2579return nullptr;2580
2581LibFunc Func = NotLibFunc;2582if (!TLI->getLibFunc(Name, Func))2583return nullptr;2584
2585const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]);2586if (!Op1)2587return nullptr;2588
2589const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]);2590if (!Op2)2591return nullptr;2592
2593const APFloat &Op1V = Op1->getValueAPF();2594const APFloat &Op2V = Op2->getValueAPF();2595
2596switch (Func) {2597default:2598break;2599case LibFunc_pow:2600case LibFunc_powf:2601case LibFunc_pow_finite:2602case LibFunc_powf_finite:2603if (TLI->has(Func))2604return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);2605break;2606case LibFunc_fmod:2607case LibFunc_fmodf:2608if (TLI->has(Func)) {2609APFloat V = Op1->getValueAPF();2610if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))2611return ConstantFP::get(Ty->getContext(), V);2612}2613break;2614case LibFunc_remainder:2615case LibFunc_remainderf:2616if (TLI->has(Func)) {2617APFloat V = Op1->getValueAPF();2618if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))2619return ConstantFP::get(Ty->getContext(), V);2620}2621break;2622case LibFunc_atan2:2623case LibFunc_atan2f:2624// atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm2625// (Solaris), so we do not assume a known result for that.2626if (Op1V.isZero() && Op2V.isZero())2627return nullptr;2628[[fallthrough]];2629case LibFunc_atan2_finite:2630case LibFunc_atan2f_finite:2631if (TLI->has(Func))2632return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);2633break;2634}2635
2636return nullptr;2637}
2638
2639static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,2640ArrayRef<Constant *> Operands,2641const CallBase *Call) {2642assert(Operands.size() == 2 && "Wrong number of operands.");2643
2644if (Ty->isFloatingPointTy()) {2645// TODO: We should have undef handling for all of the FP intrinsics that2646// are attempted to be folded in this function.2647bool IsOp0Undef = isa<UndefValue>(Operands[0]);2648bool IsOp1Undef = isa<UndefValue>(Operands[1]);2649switch (IntrinsicID) {2650case Intrinsic::maxnum:2651case Intrinsic::minnum:2652case Intrinsic::maximum:2653case Intrinsic::minimum:2654// If one argument is undef, return the other argument.2655if (IsOp0Undef)2656return Operands[1];2657if (IsOp1Undef)2658return Operands[0];2659break;2660}2661}2662
2663if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {2664const APFloat &Op1V = Op1->getValueAPF();2665
2666if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {2667if (Op2->getType() != Op1->getType())2668return nullptr;2669const APFloat &Op2V = Op2->getValueAPF();2670
2671if (const auto *ConstrIntr =2672dyn_cast_if_present<ConstrainedFPIntrinsic>(Call)) {2673RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);2674APFloat Res = Op1V;2675APFloat::opStatus St;2676switch (IntrinsicID) {2677default:2678return nullptr;2679case Intrinsic::experimental_constrained_fadd:2680St = Res.add(Op2V, RM);2681break;2682case Intrinsic::experimental_constrained_fsub:2683St = Res.subtract(Op2V, RM);2684break;2685case Intrinsic::experimental_constrained_fmul:2686St = Res.multiply(Op2V, RM);2687break;2688case Intrinsic::experimental_constrained_fdiv:2689St = Res.divide(Op2V, RM);2690break;2691case Intrinsic::experimental_constrained_frem:2692St = Res.mod(Op2V);2693break;2694case Intrinsic::experimental_constrained_fcmp:2695case Intrinsic::experimental_constrained_fcmps:2696return evaluateCompare(Op1V, Op2V, ConstrIntr);2697}2698if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),2699St))2700return ConstantFP::get(Ty->getContext(), Res);2701return nullptr;2702}2703
2704switch (IntrinsicID) {2705default:2706break;2707case Intrinsic::copysign:2708return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));2709case Intrinsic::minnum:2710return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));2711case Intrinsic::maxnum:2712return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));2713case Intrinsic::minimum:2714return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));2715case Intrinsic::maximum:2716return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));2717}2718
2719if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())2720return nullptr;2721
2722switch (IntrinsicID) {2723default:2724break;2725case Intrinsic::pow:2726return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);2727case Intrinsic::amdgcn_fmul_legacy:2728// The legacy behaviour is that multiplying +/- 0.0 by anything, even2729// NaN or infinity, gives +0.0.2730if (Op1V.isZero() || Op2V.isZero())2731return ConstantFP::getZero(Ty);2732return ConstantFP::get(Ty->getContext(), Op1V * Op2V);2733}2734
2735} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {2736switch (IntrinsicID) {2737case Intrinsic::ldexp: {2738return ConstantFP::get(2739Ty->getContext(),2740scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven));2741}2742case Intrinsic::is_fpclass: {2743FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());2744bool Result =2745((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||2746((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||2747((Mask & fcNegInf) && Op1V.isNegInfinity()) ||2748((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||2749((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||2750((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||2751((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||2752((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||2753((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||2754((Mask & fcPosInf) && Op1V.isPosInfinity());2755return ConstantInt::get(Ty, Result);2756}2757default:2758break;2759}2760
2761if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())2762return nullptr;2763if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())2764return ConstantFP::get(2765Ty->getContext(),2766APFloat((float)std::pow((float)Op1V.convertToDouble(),2767(int)Op2C->getZExtValue())));2768if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())2769return ConstantFP::get(2770Ty->getContext(),2771APFloat((float)std::pow((float)Op1V.convertToDouble(),2772(int)Op2C->getZExtValue())));2773if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())2774return ConstantFP::get(2775Ty->getContext(),2776APFloat((double)std::pow(Op1V.convertToDouble(),2777(int)Op2C->getZExtValue())));2778}2779return nullptr;2780}2781
2782if (Operands[0]->getType()->isIntegerTy() &&2783Operands[1]->getType()->isIntegerTy()) {2784const APInt *C0, *C1;2785if (!getConstIntOrUndef(Operands[0], C0) ||2786!getConstIntOrUndef(Operands[1], C1))2787return nullptr;2788
2789switch (IntrinsicID) {2790default: break;2791case Intrinsic::smax:2792case Intrinsic::smin:2793case Intrinsic::umax:2794case Intrinsic::umin:2795// This is the same as for binary ops - poison propagates.2796// TODO: Poison handling should be consolidated.2797if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2798return PoisonValue::get(Ty);2799
2800if (!C0 && !C1)2801return UndefValue::get(Ty);2802if (!C0 || !C1)2803return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);2804return ConstantInt::get(2805Ty, ICmpInst::compare(*C0, *C1,2806MinMaxIntrinsic::getPredicate(IntrinsicID))2807? *C02808: *C1);2809
2810case Intrinsic::scmp:2811case Intrinsic::ucmp:2812if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2813return PoisonValue::get(Ty);2814
2815if (!C0 || !C1)2816return ConstantInt::get(Ty, 0);2817
2818int Res;2819if (IntrinsicID == Intrinsic::scmp)2820Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;2821else2822Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;2823return ConstantInt::get(Ty, Res, /*IsSigned=*/true);2824
2825case Intrinsic::usub_with_overflow:2826case Intrinsic::ssub_with_overflow:2827// X - undef -> { 0, false }2828// undef - X -> { 0, false }2829if (!C0 || !C1)2830return Constant::getNullValue(Ty);2831[[fallthrough]];2832case Intrinsic::uadd_with_overflow:2833case Intrinsic::sadd_with_overflow:2834// X + undef -> { -1, false }2835// undef + x -> { -1, false }2836if (!C0 || !C1) {2837return ConstantStruct::get(2838cast<StructType>(Ty),2839{Constant::getAllOnesValue(Ty->getStructElementType(0)),2840Constant::getNullValue(Ty->getStructElementType(1))});2841}2842[[fallthrough]];2843case Intrinsic::smul_with_overflow:2844case Intrinsic::umul_with_overflow: {2845// undef * X -> { 0, false }2846// X * undef -> { 0, false }2847if (!C0 || !C1)2848return Constant::getNullValue(Ty);2849
2850APInt Res;2851bool Overflow;2852switch (IntrinsicID) {2853default: llvm_unreachable("Invalid case");2854case Intrinsic::sadd_with_overflow:2855Res = C0->sadd_ov(*C1, Overflow);2856break;2857case Intrinsic::uadd_with_overflow:2858Res = C0->uadd_ov(*C1, Overflow);2859break;2860case Intrinsic::ssub_with_overflow:2861Res = C0->ssub_ov(*C1, Overflow);2862break;2863case Intrinsic::usub_with_overflow:2864Res = C0->usub_ov(*C1, Overflow);2865break;2866case Intrinsic::smul_with_overflow:2867Res = C0->smul_ov(*C1, Overflow);2868break;2869case Intrinsic::umul_with_overflow:2870Res = C0->umul_ov(*C1, Overflow);2871break;2872}2873Constant *Ops[] = {2874ConstantInt::get(Ty->getContext(), Res),2875ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)2876};2877return ConstantStruct::get(cast<StructType>(Ty), Ops);2878}2879case Intrinsic::uadd_sat:2880case Intrinsic::sadd_sat:2881// This is the same as for binary ops - poison propagates.2882// TODO: Poison handling should be consolidated.2883if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2884return PoisonValue::get(Ty);2885
2886if (!C0 && !C1)2887return UndefValue::get(Ty);2888if (!C0 || !C1)2889return Constant::getAllOnesValue(Ty);2890if (IntrinsicID == Intrinsic::uadd_sat)2891return ConstantInt::get(Ty, C0->uadd_sat(*C1));2892else2893return ConstantInt::get(Ty, C0->sadd_sat(*C1));2894case Intrinsic::usub_sat:2895case Intrinsic::ssub_sat:2896// This is the same as for binary ops - poison propagates.2897// TODO: Poison handling should be consolidated.2898if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2899return PoisonValue::get(Ty);2900
2901if (!C0 && !C1)2902return UndefValue::get(Ty);2903if (!C0 || !C1)2904return Constant::getNullValue(Ty);2905if (IntrinsicID == Intrinsic::usub_sat)2906return ConstantInt::get(Ty, C0->usub_sat(*C1));2907else2908return ConstantInt::get(Ty, C0->ssub_sat(*C1));2909case Intrinsic::cttz:2910case Intrinsic::ctlz:2911assert(C1 && "Must be constant int");2912
2913// cttz(0, 1) and ctlz(0, 1) are poison.2914if (C1->isOne() && (!C0 || C0->isZero()))2915return PoisonValue::get(Ty);2916if (!C0)2917return Constant::getNullValue(Ty);2918if (IntrinsicID == Intrinsic::cttz)2919return ConstantInt::get(Ty, C0->countr_zero());2920else2921return ConstantInt::get(Ty, C0->countl_zero());2922
2923case Intrinsic::abs:2924assert(C1 && "Must be constant int");2925assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");2926
2927// Undef or minimum val operand with poison min --> undef2928if (C1->isOne() && (!C0 || C0->isMinSignedValue()))2929return UndefValue::get(Ty);2930
2931// Undef operand with no poison min --> 0 (sign bit must be clear)2932if (!C0)2933return Constant::getNullValue(Ty);2934
2935return ConstantInt::get(Ty, C0->abs());2936case Intrinsic::amdgcn_wave_reduce_umin:2937case Intrinsic::amdgcn_wave_reduce_umax:2938return dyn_cast<Constant>(Operands[0]);2939}2940
2941return nullptr;2942}2943
2944// Support ConstantVector in case we have an Undef in the top.2945if ((isa<ConstantVector>(Operands[0]) ||2946isa<ConstantDataVector>(Operands[0])) &&2947// Check for default rounding mode.2948// FIXME: Support other rounding modes?2949isa<ConstantInt>(Operands[1]) &&2950cast<ConstantInt>(Operands[1])->getValue() == 4) {2951auto *Op = cast<Constant>(Operands[0]);2952switch (IntrinsicID) {2953default: break;2954case Intrinsic::x86_avx512_vcvtss2si32:2955case Intrinsic::x86_avx512_vcvtss2si64:2956case Intrinsic::x86_avx512_vcvtsd2si32:2957case Intrinsic::x86_avx512_vcvtsd2si64:2958if (ConstantFP *FPOp =2959dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2960return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2961/*roundTowardZero=*/false, Ty,2962/*IsSigned*/true);2963break;2964case Intrinsic::x86_avx512_vcvtss2usi32:2965case Intrinsic::x86_avx512_vcvtss2usi64:2966case Intrinsic::x86_avx512_vcvtsd2usi32:2967case Intrinsic::x86_avx512_vcvtsd2usi64:2968if (ConstantFP *FPOp =2969dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2970return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2971/*roundTowardZero=*/false, Ty,2972/*IsSigned*/false);2973break;2974case Intrinsic::x86_avx512_cvttss2si:2975case Intrinsic::x86_avx512_cvttss2si64:2976case Intrinsic::x86_avx512_cvttsd2si:2977case Intrinsic::x86_avx512_cvttsd2si64:2978if (ConstantFP *FPOp =2979dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2980return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2981/*roundTowardZero=*/true, Ty,2982/*IsSigned*/true);2983break;2984case Intrinsic::x86_avx512_cvttss2usi:2985case Intrinsic::x86_avx512_cvttss2usi64:2986case Intrinsic::x86_avx512_cvttsd2usi:2987case Intrinsic::x86_avx512_cvttsd2usi64:2988if (ConstantFP *FPOp =2989dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2990return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2991/*roundTowardZero=*/true, Ty,2992/*IsSigned*/false);2993break;2994}2995}2996return nullptr;2997}
2998
2999static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,3000const APFloat &S0,3001const APFloat &S1,3002const APFloat &S2) {3003unsigned ID;3004const fltSemantics &Sem = S0.getSemantics();3005APFloat MA(Sem), SC(Sem), TC(Sem);3006if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {3007if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {3008// S2 < 03009ID = 5;3010SC = -S0;3011} else {3012ID = 4;3013SC = S0;3014}3015MA = S2;3016TC = -S1;3017} else if (abs(S1) >= abs(S0)) {3018if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {3019// S1 < 03020ID = 3;3021TC = -S2;3022} else {3023ID = 2;3024TC = S2;3025}3026MA = S1;3027SC = S0;3028} else {3029if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {3030// S0 < 03031ID = 1;3032SC = S2;3033} else {3034ID = 0;3035SC = -S2;3036}3037MA = S0;3038TC = -S1;3039}3040switch (IntrinsicID) {3041default:3042llvm_unreachable("unhandled amdgcn cube intrinsic");3043case Intrinsic::amdgcn_cubeid:3044return APFloat(Sem, ID);3045case Intrinsic::amdgcn_cubema:3046return MA + MA;3047case Intrinsic::amdgcn_cubesc:3048return SC;3049case Intrinsic::amdgcn_cubetc:3050return TC;3051}3052}
3053
3054static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,3055Type *Ty) {3056const APInt *C0, *C1, *C2;3057if (!getConstIntOrUndef(Operands[0], C0) ||3058!getConstIntOrUndef(Operands[1], C1) ||3059!getConstIntOrUndef(Operands[2], C2))3060return nullptr;3061
3062if (!C2)3063return UndefValue::get(Ty);3064
3065APInt Val(32, 0);3066unsigned NumUndefBytes = 0;3067for (unsigned I = 0; I < 32; I += 8) {3068unsigned Sel = C2->extractBitsAsZExtValue(8, I);3069unsigned B = 0;3070
3071if (Sel >= 13)3072B = 0xff;3073else if (Sel == 12)3074B = 0x00;3075else {3076const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;3077if (!Src)3078++NumUndefBytes;3079else if (Sel < 8)3080B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);3081else3082B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;3083}3084
3085Val.insertBits(B, I, 8);3086}3087
3088if (NumUndefBytes == 4)3089return UndefValue::get(Ty);3090
3091return ConstantInt::get(Ty, Val);3092}
3093
3094static Constant *ConstantFoldScalarCall3(StringRef Name,3095Intrinsic::ID IntrinsicID,3096Type *Ty,3097ArrayRef<Constant *> Operands,3098const TargetLibraryInfo *TLI,3099const CallBase *Call) {3100assert(Operands.size() == 3 && "Wrong number of operands.");3101
3102if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {3103if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {3104if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {3105const APFloat &C1 = Op1->getValueAPF();3106const APFloat &C2 = Op2->getValueAPF();3107const APFloat &C3 = Op3->getValueAPF();3108
3109if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {3110RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);3111APFloat Res = C1;3112APFloat::opStatus St;3113switch (IntrinsicID) {3114default:3115return nullptr;3116case Intrinsic::experimental_constrained_fma:3117case Intrinsic::experimental_constrained_fmuladd:3118St = Res.fusedMultiplyAdd(C2, C3, RM);3119break;3120}3121if (mayFoldConstrained(3122const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))3123return ConstantFP::get(Ty->getContext(), Res);3124return nullptr;3125}3126
3127switch (IntrinsicID) {3128default: break;3129case Intrinsic::amdgcn_fma_legacy: {3130// The legacy behaviour is that multiplying +/- 0.0 by anything, even3131// NaN or infinity, gives +0.0.3132if (C1.isZero() || C2.isZero()) {3133// It's tempting to just return C3 here, but that would give the3134// wrong result if C3 was -0.0.3135return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);3136}3137[[fallthrough]];3138}3139case Intrinsic::fma:3140case Intrinsic::fmuladd: {3141APFloat V = C1;3142V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);3143return ConstantFP::get(Ty->getContext(), V);3144}3145case Intrinsic::amdgcn_cubeid:3146case Intrinsic::amdgcn_cubema:3147case Intrinsic::amdgcn_cubesc:3148case Intrinsic::amdgcn_cubetc: {3149APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);3150return ConstantFP::get(Ty->getContext(), V);3151}3152}3153}3154}3155}3156
3157if (IntrinsicID == Intrinsic::smul_fix ||3158IntrinsicID == Intrinsic::smul_fix_sat) {3159// poison * C -> poison3160// C * poison -> poison3161if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))3162return PoisonValue::get(Ty);3163
3164const APInt *C0, *C1;3165if (!getConstIntOrUndef(Operands[0], C0) ||3166!getConstIntOrUndef(Operands[1], C1))3167return nullptr;3168
3169// undef * C -> 03170// C * undef -> 03171if (!C0 || !C1)3172return Constant::getNullValue(Ty);3173
3174// This code performs rounding towards negative infinity in case the result3175// cannot be represented exactly for the given scale. Targets that do care3176// about rounding should use a target hook for specifying how rounding3177// should be done, and provide their own folding to be consistent with3178// rounding. This is the same approach as used by3179// DAGTypeLegalizer::ExpandIntRes_MULFIX.3180unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();3181unsigned Width = C0->getBitWidth();3182assert(Scale < Width && "Illegal scale.");3183unsigned ExtendedWidth = Width * 2;3184APInt Product =3185(C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);3186if (IntrinsicID == Intrinsic::smul_fix_sat) {3187APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);3188APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);3189Product = APIntOps::smin(Product, Max);3190Product = APIntOps::smax(Product, Min);3191}3192return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));3193}3194
3195if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {3196const APInt *C0, *C1, *C2;3197if (!getConstIntOrUndef(Operands[0], C0) ||3198!getConstIntOrUndef(Operands[1], C1) ||3199!getConstIntOrUndef(Operands[2], C2))3200return nullptr;3201
3202bool IsRight = IntrinsicID == Intrinsic::fshr;3203if (!C2)3204return Operands[IsRight ? 1 : 0];3205if (!C0 && !C1)3206return UndefValue::get(Ty);3207
3208// The shift amount is interpreted as modulo the bitwidth. If the shift3209// amount is effectively 0, avoid UB due to oversized inverse shift below.3210unsigned BitWidth = C2->getBitWidth();3211unsigned ShAmt = C2->urem(BitWidth);3212if (!ShAmt)3213return Operands[IsRight ? 1 : 0];3214
3215// (C0 << ShlAmt) | (C1 >> LshrAmt)3216unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;3217unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;3218if (!C0)3219return ConstantInt::get(Ty, C1->lshr(LshrAmt));3220if (!C1)3221return ConstantInt::get(Ty, C0->shl(ShlAmt));3222return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));3223}3224
3225if (IntrinsicID == Intrinsic::amdgcn_perm)3226return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);3227
3228return nullptr;3229}
3230
3231static Constant *ConstantFoldScalarCall(StringRef Name,3232Intrinsic::ID IntrinsicID,3233Type *Ty,3234ArrayRef<Constant *> Operands,3235const TargetLibraryInfo *TLI,3236const CallBase *Call) {3237if (Operands.size() == 1)3238return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);3239
3240if (Operands.size() == 2) {3241if (Constant *FoldedLibCall =3242ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {3243return FoldedLibCall;3244}3245return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);3246}3247
3248if (Operands.size() == 3)3249return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);3250
3251return nullptr;3252}
3253
3254static Constant *ConstantFoldFixedVectorCall(3255StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,3256ArrayRef<Constant *> Operands, const DataLayout &DL,3257const TargetLibraryInfo *TLI, const CallBase *Call) {3258SmallVector<Constant *, 4> Result(FVTy->getNumElements());3259SmallVector<Constant *, 4> Lane(Operands.size());3260Type *Ty = FVTy->getElementType();3261
3262switch (IntrinsicID) {3263case Intrinsic::masked_load: {3264auto *SrcPtr = Operands[0];3265auto *Mask = Operands[2];3266auto *Passthru = Operands[3];3267
3268Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);3269
3270SmallVector<Constant *, 32> NewElements;3271for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {3272auto *MaskElt = Mask->getAggregateElement(I);3273if (!MaskElt)3274break;3275auto *PassthruElt = Passthru->getAggregateElement(I);3276auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;3277if (isa<UndefValue>(MaskElt)) {3278if (PassthruElt)3279NewElements.push_back(PassthruElt);3280else if (VecElt)3281NewElements.push_back(VecElt);3282else3283return nullptr;3284}3285if (MaskElt->isNullValue()) {3286if (!PassthruElt)3287return nullptr;3288NewElements.push_back(PassthruElt);3289} else if (MaskElt->isOneValue()) {3290if (!VecElt)3291return nullptr;3292NewElements.push_back(VecElt);3293} else {3294return nullptr;3295}3296}3297if (NewElements.size() != FVTy->getNumElements())3298return nullptr;3299return ConstantVector::get(NewElements);3300}3301case Intrinsic::arm_mve_vctp8:3302case Intrinsic::arm_mve_vctp16:3303case Intrinsic::arm_mve_vctp32:3304case Intrinsic::arm_mve_vctp64: {3305if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {3306unsigned Lanes = FVTy->getNumElements();3307uint64_t Limit = Op->getZExtValue();3308
3309SmallVector<Constant *, 16> NCs;3310for (unsigned i = 0; i < Lanes; i++) {3311if (i < Limit)3312NCs.push_back(ConstantInt::getTrue(Ty));3313else3314NCs.push_back(ConstantInt::getFalse(Ty));3315}3316return ConstantVector::get(NCs);3317}3318return nullptr;3319}3320case Intrinsic::get_active_lane_mask: {3321auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);3322auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);3323if (Op0 && Op1) {3324unsigned Lanes = FVTy->getNumElements();3325uint64_t Base = Op0->getZExtValue();3326uint64_t Limit = Op1->getZExtValue();3327
3328SmallVector<Constant *, 16> NCs;3329for (unsigned i = 0; i < Lanes; i++) {3330if (Base + i < Limit)3331NCs.push_back(ConstantInt::getTrue(Ty));3332else3333NCs.push_back(ConstantInt::getFalse(Ty));3334}3335return ConstantVector::get(NCs);3336}3337return nullptr;3338}3339default:3340break;3341}3342
3343for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {3344// Gather a column of constants.3345for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {3346// Some intrinsics use a scalar type for certain arguments.3347if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) {3348Lane[J] = Operands[J];3349continue;3350}3351
3352Constant *Agg = Operands[J]->getAggregateElement(I);3353if (!Agg)3354return nullptr;3355
3356Lane[J] = Agg;3357}3358
3359// Use the regular scalar folding to simplify this column.3360Constant *Folded =3361ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);3362if (!Folded)3363return nullptr;3364Result[I] = Folded;3365}3366
3367return ConstantVector::get(Result);3368}
3369
3370static Constant *ConstantFoldScalableVectorCall(3371StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,3372ArrayRef<Constant *> Operands, const DataLayout &DL,3373const TargetLibraryInfo *TLI, const CallBase *Call) {3374switch (IntrinsicID) {3375case Intrinsic::aarch64_sve_convert_from_svbool: {3376auto *Src = dyn_cast<Constant>(Operands[0]);3377if (!Src || !Src->isNullValue())3378break;3379
3380return ConstantInt::getFalse(SVTy);3381}3382default:3383break;3384}3385return nullptr;3386}
3387
3388static std::pair<Constant *, Constant *>3389ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {3390if (isa<PoisonValue>(Op))3391return {Op, PoisonValue::get(IntTy)};3392
3393auto *ConstFP = dyn_cast<ConstantFP>(Op);3394if (!ConstFP)3395return {};3396
3397const APFloat &U = ConstFP->getValueAPF();3398int FrexpExp;3399APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven);3400Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant);3401
3402// The exponent is an "unspecified value" for inf/nan. We use zero to avoid3403// using undef.3404Constant *Result1 = FrexpMant.isFinite() ? ConstantInt::get(IntTy, FrexpExp)3405: ConstantInt::getNullValue(IntTy);3406return {Result0, Result1};3407}
3408
3409/// Handle intrinsics that return tuples, which may be tuples of vectors.
3410static Constant *3411ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,3412StructType *StTy, ArrayRef<Constant *> Operands,3413const DataLayout &DL, const TargetLibraryInfo *TLI,3414const CallBase *Call) {3415
3416switch (IntrinsicID) {3417case Intrinsic::frexp: {3418Type *Ty0 = StTy->getContainedType(0);3419Type *Ty1 = StTy->getContainedType(1)->getScalarType();3420
3421if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) {3422SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());3423SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());3424
3425for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {3426Constant *Lane = Operands[0]->getAggregateElement(I);3427std::tie(Results0[I], Results1[I]) =3428ConstantFoldScalarFrexpCall(Lane, Ty1);3429if (!Results0[I])3430return nullptr;3431}3432
3433return ConstantStruct::get(StTy, ConstantVector::get(Results0),3434ConstantVector::get(Results1));3435}3436
3437auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1);3438if (!Result0)3439return nullptr;3440return ConstantStruct::get(StTy, Result0, Result1);3441}3442default:3443// TODO: Constant folding of vector intrinsics that fall through here does3444// not work (e.g. overflow intrinsics)3445return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call);3446}3447
3448return nullptr;3449}
3450
3451} // end anonymous namespace3452
3453Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,3454Constant *RHS, Type *Ty,3455Instruction *FMFSource) {3456return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},3457dyn_cast_if_present<CallBase>(FMFSource));3458}
3459
3460Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,3461ArrayRef<Constant *> Operands,3462const TargetLibraryInfo *TLI,3463bool AllowNonDeterministic) {3464if (Call->isNoBuiltin())3465return nullptr;3466if (!F->hasName())3467return nullptr;3468
3469// If this is not an intrinsic and not recognized as a library call, bail out.3470Intrinsic::ID IID = F->getIntrinsicID();3471if (IID == Intrinsic::not_intrinsic) {3472if (!TLI)3473return nullptr;3474LibFunc LibF;3475if (!TLI->getLibFunc(*F, LibF))3476return nullptr;3477}3478
3479// Conservatively assume that floating-point libcalls may be3480// non-deterministic.3481Type *Ty = F->getReturnType();3482if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())3483return nullptr;3484
3485StringRef Name = F->getName();3486if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))3487return ConstantFoldFixedVectorCall(3488Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call);3489
3490if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))3491return ConstantFoldScalableVectorCall(3492Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call);3493
3494if (auto *StTy = dyn_cast<StructType>(Ty))3495return ConstantFoldStructCall(Name, IID, StTy, Operands,3496F->getDataLayout(), TLI, Call);3497
3498// TODO: If this is a library function, we already discovered that above,3499// so we should pass the LibFunc, not the name (and it might be better3500// still to separate intrinsic handling from libcalls).3501return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call);3502}
3503
3504bool llvm::isMathLibCallNoop(const CallBase *Call,3505const TargetLibraryInfo *TLI) {3506// FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap3507// (and to some extent ConstantFoldScalarCall).3508if (Call->isNoBuiltin() || Call->isStrictFP())3509return false;3510Function *F = Call->getCalledFunction();3511if (!F)3512return false;3513
3514LibFunc Func;3515if (!TLI || !TLI->getLibFunc(*F, Func))3516return false;3517
3518if (Call->arg_size() == 1) {3519if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {3520const APFloat &Op = OpC->getValueAPF();3521switch (Func) {3522case LibFunc_logl:3523case LibFunc_log:3524case LibFunc_logf:3525case LibFunc_log2l:3526case LibFunc_log2:3527case LibFunc_log2f:3528case LibFunc_log10l:3529case LibFunc_log10:3530case LibFunc_log10f:3531return Op.isNaN() || (!Op.isZero() && !Op.isNegative());3532
3533case LibFunc_expl:3534case LibFunc_exp:3535case LibFunc_expf:3536// FIXME: These boundaries are slightly conservative.3537if (OpC->getType()->isDoubleTy())3538return !(Op < APFloat(-745.0) || Op > APFloat(709.0));3539if (OpC->getType()->isFloatTy())3540return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));3541break;3542
3543case LibFunc_exp2l:3544case LibFunc_exp2:3545case LibFunc_exp2f:3546// FIXME: These boundaries are slightly conservative.3547if (OpC->getType()->isDoubleTy())3548return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));3549if (OpC->getType()->isFloatTy())3550return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));3551break;3552
3553case LibFunc_sinl:3554case LibFunc_sin:3555case LibFunc_sinf:3556case LibFunc_cosl:3557case LibFunc_cos:3558case LibFunc_cosf:3559return !Op.isInfinity();3560
3561case LibFunc_tanl:3562case LibFunc_tan:3563case LibFunc_tanf: {3564// FIXME: Stop using the host math library.3565// FIXME: The computation isn't done in the right precision.3566Type *Ty = OpC->getType();3567if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())3568return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;3569break;3570}3571
3572case LibFunc_atan:3573case LibFunc_atanf:3574case LibFunc_atanl:3575// Per POSIX, this MAY fail if Op is denormal. We choose not failing.3576return true;3577
3578
3579case LibFunc_asinl:3580case LibFunc_asin:3581case LibFunc_asinf:3582case LibFunc_acosl:3583case LibFunc_acos:3584case LibFunc_acosf:3585return !(Op < APFloat(Op.getSemantics(), "-1") ||3586Op > APFloat(Op.getSemantics(), "1"));3587
3588case LibFunc_sinh:3589case LibFunc_cosh:3590case LibFunc_sinhf:3591case LibFunc_coshf:3592case LibFunc_sinhl:3593case LibFunc_coshl:3594// FIXME: These boundaries are slightly conservative.3595if (OpC->getType()->isDoubleTy())3596return !(Op < APFloat(-710.0) || Op > APFloat(710.0));3597if (OpC->getType()->isFloatTy())3598return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));3599break;3600
3601case LibFunc_sqrtl:3602case LibFunc_sqrt:3603case LibFunc_sqrtf:3604return Op.isNaN() || Op.isZero() || !Op.isNegative();3605
3606// FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,3607// maybe others?3608default:3609break;3610}3611}3612}3613
3614if (Call->arg_size() == 2) {3615ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));3616ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));3617if (Op0C && Op1C) {3618const APFloat &Op0 = Op0C->getValueAPF();3619const APFloat &Op1 = Op1C->getValueAPF();3620
3621switch (Func) {3622case LibFunc_powl:3623case LibFunc_pow:3624case LibFunc_powf: {3625// FIXME: Stop using the host math library.3626// FIXME: The computation isn't done in the right precision.3627Type *Ty = Op0C->getType();3628if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {3629if (Ty == Op1C->getType())3630return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;3631}3632break;3633}3634
3635case LibFunc_fmodl:3636case LibFunc_fmod:3637case LibFunc_fmodf:3638case LibFunc_remainderl:3639case LibFunc_remainder:3640case LibFunc_remainderf:3641return Op0.isNaN() || Op1.isNaN() ||3642(!Op0.isInfinity() && !Op1.isZero());3643
3644case LibFunc_atan2:3645case LibFunc_atan2f:3646case LibFunc_atan2l:3647// Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and3648// GLIBC and MSVC do not appear to raise an error on those, we3649// cannot rely on that behavior. POSIX and C11 say that a domain error3650// may occur, so allow for that possibility.3651return !Op0.isZero() || !Op1.isZero();3652
3653default:3654break;3655}3656}3657}3658
3659return false;3660}
3661
3662void TargetFolder::anchor() {}3663