From cd82db9422f44d47061194c8b69851effe20693b Mon Sep 17 00:00:00 2001 From: chinosk <2248589280@qq.com> Date: Tue, 2 Jul 2024 19:21:36 +0800 Subject: [PATCH] split generic text --- app/src/main/cpp/GakumasLocalify/Local.cpp | 208 ++++++++++++++++-- .../cpp/deps/UnityResolve/UnityResolve.hpp | 19 ++ 2 files changed, 211 insertions(+), 16 deletions(-) diff --git a/app/src/main/cpp/GakumasLocalify/Local.cpp b/app/src/main/cpp/GakumasLocalify/Local.cpp index 1c9ee87..a073b63 100644 --- a/app/src/main/cpp/GakumasLocalify/Local.cpp +++ b/app/src/main/cpp/GakumasLocalify/Local.cpp @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include "BaseDefine.h" @@ -19,6 +22,8 @@ namespace GakumasLocal::Local { std::unordered_map i18nDumpData{}; std::unordered_map genericText{}; std::vector genericTextDumpData{}; + std::vector genericSplittedDumpData{}; + std::vector genericOrigTextDumpData{}; std::unordered_set translatedText{}; int genericDumpFileIndex = 0; @@ -26,6 +31,48 @@ namespace GakumasLocal::Local { return Plugin::GetInstance().GetHookInstaller()->localizationFilesDir; } + std::string trim(const std::string& str) { + auto is_not_space = [](char ch) { return !std::isspace(ch); }; + auto start = std::ranges::find_if(str, is_not_space); + auto end = std::ranges::find_if(str | std::views::reverse, is_not_space).base(); + + if (start < end) { + return {start, end}; + } + return ""; + } + + std::string findInMapIgnoreSpace(const std::string& key, const std::unordered_map& searchMap) { + auto is_space = [](char ch) { return std::isspace(ch); }; + auto front = std::ranges::find_if_not(key, is_space); + auto back = std::ranges::find_if_not(key | std::views::reverse, is_space).base(); + + std::string prefix(key.begin(), front); + std::string suffix(back, key.end()); + + std::string trimmedKey = trim(key); + if ( auto it = searchMap.find(trimmedKey); it != searchMap.end()) { + return prefix + it->second + suffix; + } + else { + return ""; + } + } + + enum class DumpStrStat { + DEFAULT = 0, + SPLITTABLE_ORIG = 1, + SPLITTED = 2 + }; + + enum class SplitTagsTranslationStat { + NO_TRANS, + PART_TRANS, + FULL_TRANS, + NO_SPLIT, + NO_SPLIT_AND_EMPTY + }; + void LoadJsonDataToMap(const std::filesystem::path& filePath, std::unordered_map& dict, const bool insertToTranslated = false, const bool needClearDict = true) { if (!exists(filePath)) return; @@ -84,7 +131,7 @@ namespace GakumasLocal::Local { } void DumpVectorDataToJson(const std::filesystem::path& dumpBasePath, const std::filesystem::path& fileName, - const std::vector& vec) { + const std::vector& vec, const std::string& valuePrefix = "") { const auto dumpFilePath = dumpBasePath / fileName; try { if (!is_directory(dumpBasePath)) { @@ -101,7 +148,12 @@ namespace GakumasLocal::Local { dumpLrcFile.close(); auto fileData = nlohmann::ordered_json::parse(fileContent); for (const auto& i : vec) { - fileData[i] = i; + if (!valuePrefix.empty()) { + fileData[i] = valuePrefix + i; + } + else { + fileData[i] = i; + } } const auto newStr = fileData.dump(4, 32, false); std::ofstream dumpWriteLrcFile(dumpFilePath, std::ofstream::out); @@ -199,6 +251,87 @@ namespace GakumasLocal::Local { return ret; } + SplitTagsTranslationStat GetSplitTagsTranslationFull(const std::string& origTextIn, std::string* newText, std::vector& unTransResultRet) { + // static const std::u16string splitFlags = u"0123456789++--%%【】."; + static const std::unordered_set splitFlags = {u'0', u'1', u'2', u'3', u'4', u'5', + u'6', u'7', u'8', u'9', u'+', u'+', + u'-', u'-', u'%', u'%', u'【', u'】', + u'.', u':', u':', u'×'}; + + const auto origText = Misc::ToUTF16(origTextIn); + bool isInTag = false; + std::vector waitingReplaceTexts{}; + + std::u16string currentWaitingReplaceText; + +#define checkCurrentWaitingReplaceTextAndClear() \ + if (!currentWaitingReplaceText.empty()) { \ + waitingReplaceTexts.push_back(Misc::ToUTF8(currentWaitingReplaceText)); \ + currentWaitingReplaceText.clear(); } + + for (char16_t currChar : origText) { + if (currChar == u'<') { + isInTag = true; + } + if (currChar == u'>') { + isInTag = false; + checkCurrentWaitingReplaceTextAndClear() + continue; + } + if (isInTag) { + checkCurrentWaitingReplaceTextAndClear() + continue; + } + + if (!splitFlags.contains(currChar)) { + currentWaitingReplaceText.push_back(currChar); + } + else { + checkCurrentWaitingReplaceTextAndClear() + } + } + if (waitingReplaceTexts.empty()) { + if (currentWaitingReplaceText.empty()) { + return SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY; + } + else { + return SplitTagsTranslationStat::NO_SPLIT; + } + } + checkCurrentWaitingReplaceTextAndClear() + + *newText = origTextIn; + SplitTagsTranslationStat ret; + bool hasTrans = false; + bool hasNotTrans = false; + if (!waitingReplaceTexts.empty()) { + for (const auto& i : waitingReplaceTexts) { + const auto searchResult = findInMapIgnoreSpace(i, genericText); + if (!searchResult.empty()) { + ReplaceString(newText, i, searchResult); + hasTrans = true; + } + else { + unTransResultRet.emplace_back(trim(i)); + hasNotTrans = true; + } + } + if (hasTrans && hasNotTrans) { + ret = SplitTagsTranslationStat::PART_TRANS; + } + else if (hasTrans && !hasNotTrans) { + ret = SplitTagsTranslationStat::FULL_TRANS; + } + else { + ret = SplitTagsTranslationStat::NO_TRANS; + } + } + else { + ret = SplitTagsTranslationStat::NO_TRANS; + } + return ret; + } + void LoadData() { static auto localizationFile = GetBasePath() / "local-files" / "localization.json"; static auto genericFile = GetBasePath() / "local-files" / "generic.json"; @@ -215,7 +348,7 @@ namespace GakumasLocal::Local { if (std::filesystem::exists(genericDir) || std::filesystem::is_directory(genericDir)) { for (const auto& entry : std::filesystem::recursive_directory_iterator(genericDir)) { if (std::filesystem::is_regular_file(entry.path())) { - const auto currFile = entry.path(); + const auto& currFile = entry.path(); if (to_lower(currFile.extension().string()) == ".json") { LoadJsonDataToMap(currFile, genericText, true, false); } @@ -285,29 +418,47 @@ namespace GakumasLocal::Local { return false; } - std::string GetDumpGenericFileName() { - if (genericDumpFileIndex == 0) return "generic.json"; - return Log::StringFormat("generic_%d.json", genericDumpFileIndex); + std::string GetDumpGenericFileName(DumpStrStat stat = DumpStrStat::DEFAULT) { + if (stat == DumpStrStat::SPLITTABLE_ORIG) { + if (genericDumpFileIndex == 0) return "generic_orig.json"; + return Log::StringFormat("generic_orig_%d.json", genericDumpFileIndex); + } + else { + if (genericDumpFileIndex == 0) return "generic.json"; + return Log::StringFormat("generic_%d.json", genericDumpFileIndex); + } } bool inDumpGeneric = false; - void DumpGenericText(const std::string& origText) { + void DumpGenericText(const std::string& origText, DumpStrStat stat = DumpStrStat::DEFAULT) { if (translatedText.contains(origText)) return; - if (std::find(genericTextDumpData.begin(), genericTextDumpData.end(), origText) != genericTextDumpData.end()) { + std::array>, 3> targets = { + genericTextDumpData, + genericOrigTextDumpData, + genericSplittedDumpData + }; + + auto& appendTarget = targets[static_cast(stat)].get(); + + if (std::find(appendTarget.begin(), appendTarget.end(), origText) != appendTarget.end()) { return; } if (IsPureStringValue(origText)) return; - genericTextDumpData.push_back(origText); + appendTarget.push_back(origText); static auto dumpBasePath = GetBasePath() / "dump-files"; if (inDumpGeneric) return; inDumpGeneric = true; std::thread([](){ std::this_thread::sleep_for(std::chrono::seconds(5)); - DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(), genericTextDumpData); + DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::DEFAULT), genericTextDumpData); + DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTABLE_ORIG), genericOrigTextDumpData); + DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTED), genericSplittedDumpData, "[split]"); genericTextDumpData.clear(); + genericSplittedDumpData.clear(); + genericOrigTextDumpData.clear(); inDumpGeneric = false; }).detach(); } @@ -318,25 +469,50 @@ namespace GakumasLocal::Local { return true; } + auto ret = false; + std::vector unTransResultRet; - if (GetSplitTagsTranslation(origText, newStr, unTransResultRet)) { - return true; + const auto splitTransStat = GetSplitTagsTranslationFull(origText, newStr, unTransResultRet); + switch (splitTransStat) { + case SplitTagsTranslationStat::FULL_TRANS: { + return true; + } break; + + case SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY: { + return false; + } break; + + case SplitTagsTranslationStat::NO_SPLIT: { + ret = false; + } break; + + case SplitTagsTranslationStat::NO_TRANS: { + ret = false; + } break; + + case SplitTagsTranslationStat::PART_TRANS: { + ret = true; + } break; } if (!Config::dumpText) { - return false; + return ret; } - if (unTransResultRet.empty()) { + if (unTransResultRet.empty() || (splitTransStat == SplitTagsTranslationStat::NO_SPLIT)) { DumpGenericText(origText); } else { for (const auto& i : unTransResultRet) { - DumpGenericText(i); + DumpGenericText(i, DumpStrStat::SPLITTED); } + // 若未翻译部分长度为1,且未翻译文本等于原文本,则不 dump 到原文本文件 + //if (unTransResultRet.size() != 1 || unTransResultRet[0] != origText) { + DumpGenericText(origText, DumpStrStat::SPLITTABLE_ORIG); + //} } - return false; + return ret; } std::string ChangeDumpTextIndex(int changeValue) { diff --git a/app/src/main/cpp/deps/UnityResolve/UnityResolve.hpp b/app/src/main/cpp/deps/UnityResolve/UnityResolve.hpp index e49b779..3b3fb5f 100644 --- a/app/src/main/cpp/deps/UnityResolve/UnityResolve.hpp +++ b/app/src/main/cpp/deps/UnityResolve/UnityResolve.hpp @@ -1393,6 +1393,25 @@ class UnityResolve final { } } + [[nodiscard]] auto ToWString() const -> std::u16string { +#if WINDOWS_MODE + if (IsBadReadPtr(this, sizeof(String))) return {}; + if (IsBadReadPtr(m_firstChar, m_stringLength)) return {}; +#endif + if (!this) return {}; + try { + // using convert_typeX = std::codecvt_utf8; + // std::wstring_convert converterX; + // return converterX.to_bytes(m_firstChar); + return {chars}; + } + catch (std::exception& e) { + std::cout << "String Invoke Error\n"; + GakumasLocal::Log::ErrorFmt("String Invoke Error: %s", e.what()); + return {}; + } + } + auto operator=(const std::string& newString) const -> String* { return New(newString); } auto operator==(const std::wstring& newString) const -> bool { return Equals(newString); }