From 2ec6f4070f83de1b1c356fceef01ba6bbbf19082 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Wed, 22 Jan 2025 18:04:34 +0100 Subject: [PATCH] Rework `StringEncOptStack` option to use globals We should never desire to have the decoded string variable live in a stack-allocated variable; as literals must remain valid throughout the lifetime of the application. This poses the question of why not using `StringEncOptGlobal` option in the first place. Leveraging `StringEncOptStack` option allows the strings to be decoded lazily; whereas, using `StringEncOptGlobal` as a default option would come to the detriment of performance, since all the strings are decoded at load-time. Hence, decode the string in a global variable locally within the function, and do not decode it twice, if it has already been decoded. --- src/passes/string-encoding/StringEncoding.cpp | 106 ++++++++++++------ .../string-encoding/basic-aarch64-stackopt.ll | 33 ++++++ .../passes/string-encoding/config_replace.py | 2 + 3 files changed, 108 insertions(+), 33 deletions(-) create mode 100644 src/test/passes/string-encoding/basic-aarch64-stackopt.ll diff --git a/src/passes/string-encoding/StringEncoding.cpp b/src/passes/string-encoding/StringEncoding.cpp index 6f8db175..b52e32b9 100644 --- a/src/passes/string-encoding/StringEncoding.cpp +++ b/src/passes/string-encoding/StringEncoding.cpp @@ -137,8 +137,11 @@ createDecodingTrampoline(GlobalVariable &G, Use &EncPtr, Instruction *NewPt, ++It; IRBuilder IRB(&*It); - AllocaInst *ClearBuffer = - IRB.CreateAlloca(ArrayType::get(IRB.getInt8Ty(), Size)); + auto *BufferTy = ArrayType::get(IRB.getInt8Ty(), Size); + auto *M = NewPt->getModule(); + GlobalVariable *ClearBuffer = + new GlobalVariable(*M, BufferTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(BufferTy)); AllocaInst *Key = IRB.CreateAlloca(IRB.getInt64Ty()); AllocaInst *StrSize = IRB.CreateAlloca(IRB.getInt32Ty()); @@ -172,7 +175,7 @@ createDecodingTrampoline(GlobalVariable &G, Use &EncPtr, Instruction *NewPt, Value *Output = Input; if (IsPartOfStackVariable) - Output = IRB.CreateInBoundsGEP(ClearBuffer->getAllocatedType(), ClearBuffer, + Output = IRB.CreateInBoundsGEP(BufferTy, ClearBuffer, {IRB.getInt64(0), IRB.getInt64(0)}); auto *NewF = @@ -205,43 +208,76 @@ createDecodingTrampoline(GlobalVariable &G, Use &EncPtr, Instruction *NewPt, ToString(*E), ToString(*V))); } - if (IsPartOfStackVariable) { - if (auto *CE = dyn_cast(EncPtr)) { - auto [First, Last] = materializeConstantExpression(NewPt, CE); - assert( - ((First != Last) || - (isa(First) || isa(First))) && - "Nested constantexpr in getelementptr/ptrtoint should not appear?"); - if (isa(First)) { - // CE is already a GEP, directly replace the operand with the decode - // output. - NewPt->setOperand(EncPtr.getOperandNo(), Output); - if (isInstructionTriviallyDead(Last)) - Last->eraseFromParent(); - } else { - Last->setOperand(0, Output); - NewPt->setOperand(EncPtr.getOperandNo(), First); - } - } else { + if (!IsPartOfStackVariable) + return IRB.CreateCall(NewF->getFunctionType(), NewF, Args); + + auto *BoolType = IRB.getInt1Ty(); + GlobalVariable *NeedDecode = + new GlobalVariable(*M, BoolType, false, GlobalValue::InternalLinkage, + ConstantInt::getFalse(BoolType)); + + It = IRB.GetInsertPoint(); + auto *WrapperType = FunctionType::get(IRB.getVoidTy(), + {IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt64Ty(), + IRB.getInt32Ty()}, + false); + auto *Wrapper = Function::Create(WrapperType, GlobalValue::PrivateLinkage, + "__omvll_decode_wrap", NewPt->getModule()); + + // Decode wrapper to check whether the global variable has already been + // decoded. + BasicBlock *Entry = BasicBlock::Create(NewPt->getContext(), "entry", Wrapper); + IRB.SetInsertPoint(Entry); + auto *ICmp = IRB.CreateICmpEQ(IRB.CreateLoad(BoolType, Wrapper->getArg(0)), + ConstantInt::getFalse(BoolType)); + auto *NewBB = BasicBlock::Create(NewPt->getContext(), "", Wrapper); + auto *ContinuationBB = BasicBlock::Create(NewPt->getContext(), "", Wrapper); + IRB.CreateCondBr(ICmp, NewBB, ContinuationBB); + IRB.SetInsertPoint(NewBB); + auto *CI = IRB.CreateCall(NewF->getFunctionType(), NewF, + {Wrapper->getArg(1), Wrapper->getArg(2), + Wrapper->getArg(3), Wrapper->getArg(4)}); + IRB.CreateStore(ConstantInt::getTrue(BoolType), Wrapper->getArg(0)); + IRB.CreateBr(ContinuationBB); + IRB.SetInsertPoint(ContinuationBB); + IRB.CreateRetVoid(); + + // Insert decode wrapper call site in the caller. + IRB.SetInsertPoint(NewPt->getParent(), It); + CI = IRB.CreateCall(Wrapper->getFunctionType(), Wrapper, + {NeedDecode, Output, Input, KeyVal, VStrSize}); + + if (auto *CE = dyn_cast(EncPtr)) { + auto [First, Last] = materializeConstantExpression(NewPt, CE); + assert(((First != Last) || + (isa(First) || isa(First))) && + "Nested constantexpr in getelementptr/ptrtoint should not appear?"); + if (isa(First)) { + // CE is already a GEP, directly replace the operand with the decode + // output. NewPt->setOperand(EncPtr.getOperandNo(), Output); + if (isInstructionTriviallyDead(Last)) + Last->eraseFromParent(); + } else { + Last->setOperand(0, Output); + NewPt->setOperand(EncPtr.getOperandNo(), First); } + } else { + NewPt->setOperand(EncPtr.getOperandNo(), Output); } - return IRB.CreateCall(NewF->getFunctionType(), NewF, Args); + return CI; } bool StringEncoding::encodeStrings(Function &F, ObfuscationConfig &UserConfig) { bool Changed = false; llvm::Module *M = F.getParent(); - for (Instruction &I : instructions(F)) { - if (isa(I)) { - SWARN("{} contains Phi node which could raise issues!", - demangle(F.getName().str())); - continue; - } + for (Instruction &I : make_early_inc_range(instructions(F))) { + assert(!isa(I) && "Found phi previously demoted?"); - for (Use &Op : I.operands()) { + for (Use &Op : make_early_inc_range(I.operands())) { auto *G = dyn_cast(Op->stripPointerCasts()); // Is the operand a constant expression? @@ -320,13 +356,17 @@ PreservedAnalyses StringEncoding::run(Module &M, ModuleAnalysisManager &MAM) { RNG = M.createRNG(name()); std::vector ToVisit; - for (Function &F : M) + for (Function &F : M) { + if (F.empty() || F.isDeclaration()) + continue; + + demotePHINode(F); ToVisit.emplace_back(&F); + } for (Function *F : ToVisit) { - demotePHINode(*F); - std::string DemangledFName = demangle(F->getName().str()); - SDEBUG("[{}] Visiting function {}", name(), DemangledFName); + std::string Name = demangle(F->getName().str()); + SDEBUG("[{}] Visiting function {}", name(), Name); Changed |= encodeStrings(*F, *UserConfig); } diff --git a/src/test/passes/string-encoding/basic-aarch64-stackopt.ll b/src/test/passes/string-encoding/basic-aarch64-stackopt.ll new file mode 100644 index 00000000..4956aa42 --- /dev/null +++ b/src/test/passes/string-encoding/basic-aarch64-stackopt.ll @@ -0,0 +1,33 @@ +; +; This file is distributed under the Apache License v2.0. See LICENSE for details. +; + +; REQUIRES: aarch64-registered-target + +; RUN: env OMVLL_CONFIG=%S/config_replace.py clang++ -fpass-plugin=%libOMVLL \ +; RUN: -target arm64-apple-ios17.5.0 -S -emit-llvm -O0 -c %s -o - | FileCheck %s +; +; RUN: env OMVLL_CONFIG=%S/config_replace.py clang++ -fpass-plugin=%libOMVLL \ +; RUN: -target aarch64-linux-android -S -emit-llvm -O0 -c %s -o - | FileCheck %s +; +; CHECK-NOT: {{.*Hello, Stack.*}} + +@__const.main.Hello = private constant [13 x i8] c"Hello, Stack\00", align 1 + +define void @test() { +; CHECK-LABEL: @test( +; CHECK: %5 = getelementptr inbounds [13 x i8], ptr @0, i64 0, i64 0 +; CHECK-NEXT: %6 = load i1, ptr @1, align 1 +; CHECK-NEXT: %7 = icmp eq i1 %6, false +; CHECK-NEXT: br i1 %7, label %8, label %__omvll_decode_wrap.exit +; CHECK: 8: +; CHECK-NEXT: call void @__omvll_decode(ptr %5, ptr @__const.main.Hello, i64 %3, i32 %4) +; CHECK-NEXT: store i1 true, ptr @1, align 1 +; CHECK-NEXT: br label %__omvll_decode_wrap.exit +; CHECK: __omvll_decode_wrap.exit: +; CHECK-NEXT: %puts = call i32 @puts(ptr %5) + %puts = call i32 @puts(ptr @__const.main.Hello) + ret void +} + +declare i32 @puts(ptr) diff --git a/src/test/passes/string-encoding/config_replace.py b/src/test/passes/string-encoding/config_replace.py index a3dc1272..0e5566e0 100644 --- a/src/test/passes/string-encoding/config_replace.py +++ b/src/test/passes/string-encoding/config_replace.py @@ -15,6 +15,8 @@ def obfuscate_string(self, _, __, string: bytes): return omvll.StringEncOptGlobal() if string.endswith(b"Swift"): return omvll.StringEncOptStack() + if string.endswith(b"Stack"): + return omvll.StringEncOptStack() @lru_cache(maxsize=1) def omvll_get_config() -> omvll.ObfuscationConfig: