Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
split generic text
Browse files Browse the repository at this point in the history
chinosk6 committed Jul 2, 2024
1 parent c28e05e commit cd82db9
Showing 2 changed files with 211 additions and 16 deletions.
208 changes: 192 additions & 16 deletions app/src/main/cpp/GakumasLocalify/Local.cpp
Original file line number Diff line number Diff line change
@@ -11,6 +11,9 @@
#include <thread>
#include <regex>
#include <ranges>
#include <string>
#include <cctype>
#include <algorithm>
#include "BaseDefine.h"


@@ -19,13 +22,57 @@ namespace GakumasLocal::Local {
std::unordered_map<std::string, std::string> i18nDumpData{};
std::unordered_map<std::string, std::string> genericText{};
std::vector<std::string> genericTextDumpData{};
std::vector<std::string> genericSplittedDumpData{};
std::vector<std::string> genericOrigTextDumpData{};
std::unordered_set<std::string> translatedText{};
int genericDumpFileIndex = 0;

std::filesystem::path GetBasePath() {
return Plugin::GetInstance().GetHookInstaller()->localizationFilesDir;
}

std::string trim(const std::string& str) {
auto is_not_space = [](char ch) { return !std::isspace(ch); };
auto start = std::ranges::find_if(str, is_not_space);
auto end = std::ranges::find_if(str | std::views::reverse, is_not_space).base();

if (start < end) {
return {start, end};
}
return "";
}

std::string findInMapIgnoreSpace(const std::string& key, const std::unordered_map<std::string, std::string>& searchMap) {
auto is_space = [](char ch) { return std::isspace(ch); };
auto front = std::ranges::find_if_not(key, is_space);
auto back = std::ranges::find_if_not(key | std::views::reverse, is_space).base();

std::string prefix(key.begin(), front);
std::string suffix(back, key.end());

std::string trimmedKey = trim(key);
if ( auto it = searchMap.find(trimmedKey); it != searchMap.end()) {
return prefix + it->second + suffix;
}
else {
return "";
}
}

enum class DumpStrStat {
DEFAULT = 0,
SPLITTABLE_ORIG = 1,
SPLITTED = 2
};

enum class SplitTagsTranslationStat {
NO_TRANS,
PART_TRANS,
FULL_TRANS,
NO_SPLIT,
NO_SPLIT_AND_EMPTY
};

void LoadJsonDataToMap(const std::filesystem::path& filePath, std::unordered_map<std::string, std::string>& dict,
const bool insertToTranslated = false, const bool needClearDict = true) {
if (!exists(filePath)) return;
@@ -84,7 +131,7 @@ namespace GakumasLocal::Local {
}

void DumpVectorDataToJson(const std::filesystem::path& dumpBasePath, const std::filesystem::path& fileName,
const std::vector<std::string>& vec) {
const std::vector<std::string>& vec, const std::string& valuePrefix = "") {
const auto dumpFilePath = dumpBasePath / fileName;
try {
if (!is_directory(dumpBasePath)) {
@@ -101,7 +148,12 @@ namespace GakumasLocal::Local {
dumpLrcFile.close();
auto fileData = nlohmann::ordered_json::parse(fileContent);
for (const auto& i : vec) {
fileData[i] = i;
if (!valuePrefix.empty()) {
fileData[i] = valuePrefix + i;
}
else {
fileData[i] = i;
}
}
const auto newStr = fileData.dump(4, 32, false);
std::ofstream dumpWriteLrcFile(dumpFilePath, std::ofstream::out);
@@ -199,6 +251,87 @@ namespace GakumasLocal::Local {
return ret;
}

SplitTagsTranslationStat GetSplitTagsTranslationFull(const std::string& origTextIn, std::string* newText, std::vector<std::string>& unTransResultRet) {
// static const std::u16string splitFlags = u"0123456789++--%%【】.";
static const std::unordered_set<char16_t> splitFlags = {u'0', u'1', u'2', u'3', u'4', u'5',
u'6', u'7', u'8', u'9', u'+', u'',
u'-', u'', u'%', u'', u'', u'',
u'.', u':', u'', u'×'};

const auto origText = Misc::ToUTF16(origTextIn);
bool isInTag = false;
std::vector<std::string> waitingReplaceTexts{};

std::u16string currentWaitingReplaceText;

#define checkCurrentWaitingReplaceTextAndClear() \
if (!currentWaitingReplaceText.empty()) { \
waitingReplaceTexts.push_back(Misc::ToUTF8(currentWaitingReplaceText)); \
currentWaitingReplaceText.clear(); }

for (char16_t currChar : origText) {
if (currChar == u'<') {
isInTag = true;
}
if (currChar == u'>') {
isInTag = false;
checkCurrentWaitingReplaceTextAndClear()
continue;
}
if (isInTag) {
checkCurrentWaitingReplaceTextAndClear()
continue;
}

if (!splitFlags.contains(currChar)) {
currentWaitingReplaceText.push_back(currChar);
}
else {
checkCurrentWaitingReplaceTextAndClear()
}
}
if (waitingReplaceTexts.empty()) {
if (currentWaitingReplaceText.empty()) {
return SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY;
}
else {
return SplitTagsTranslationStat::NO_SPLIT;
}
}
checkCurrentWaitingReplaceTextAndClear()

*newText = origTextIn;
SplitTagsTranslationStat ret;
bool hasTrans = false;
bool hasNotTrans = false;
if (!waitingReplaceTexts.empty()) {
for (const auto& i : waitingReplaceTexts) {
const auto searchResult = findInMapIgnoreSpace(i, genericText);
if (!searchResult.empty()) {
ReplaceString(newText, i, searchResult);
hasTrans = true;
}
else {
unTransResultRet.emplace_back(trim(i));
hasNotTrans = true;
}
}
if (hasTrans && hasNotTrans) {
ret = SplitTagsTranslationStat::PART_TRANS;
}
else if (hasTrans && !hasNotTrans) {
ret = SplitTagsTranslationStat::FULL_TRANS;
}
else {
ret = SplitTagsTranslationStat::NO_TRANS;
}
}
else {
ret = SplitTagsTranslationStat::NO_TRANS;
}
return ret;
}

void LoadData() {
static auto localizationFile = GetBasePath() / "local-files" / "localization.json";
static auto genericFile = GetBasePath() / "local-files" / "generic.json";
@@ -215,7 +348,7 @@ namespace GakumasLocal::Local {
if (std::filesystem::exists(genericDir) || std::filesystem::is_directory(genericDir)) {
for (const auto& entry : std::filesystem::recursive_directory_iterator(genericDir)) {
if (std::filesystem::is_regular_file(entry.path())) {
const auto currFile = entry.path();
const auto& currFile = entry.path();
if (to_lower(currFile.extension().string()) == ".json") {
LoadJsonDataToMap(currFile, genericText, true, false);
}
@@ -285,29 +418,47 @@ namespace GakumasLocal::Local {
return false;
}

std::string GetDumpGenericFileName() {
if (genericDumpFileIndex == 0) return "generic.json";
return Log::StringFormat("generic_%d.json", genericDumpFileIndex);
std::string GetDumpGenericFileName(DumpStrStat stat = DumpStrStat::DEFAULT) {
if (stat == DumpStrStat::SPLITTABLE_ORIG) {
if (genericDumpFileIndex == 0) return "generic_orig.json";
return Log::StringFormat("generic_orig_%d.json", genericDumpFileIndex);
}
else {
if (genericDumpFileIndex == 0) return "generic.json";
return Log::StringFormat("generic_%d.json", genericDumpFileIndex);
}
}

bool inDumpGeneric = false;
void DumpGenericText(const std::string& origText) {
void DumpGenericText(const std::string& origText, DumpStrStat stat = DumpStrStat::DEFAULT) {
if (translatedText.contains(origText)) return;

if (std::find(genericTextDumpData.begin(), genericTextDumpData.end(), origText) != genericTextDumpData.end()) {
std::array<std::reference_wrapper<std::vector<std::string>>, 3> targets = {
genericTextDumpData,
genericOrigTextDumpData,
genericSplittedDumpData
};

auto& appendTarget = targets[static_cast<int>(stat)].get();

if (std::find(appendTarget.begin(), appendTarget.end(), origText) != appendTarget.end()) {
return;
}
if (IsPureStringValue(origText)) return;

genericTextDumpData.push_back(origText);
appendTarget.push_back(origText);
static auto dumpBasePath = GetBasePath() / "dump-files";

if (inDumpGeneric) return;
inDumpGeneric = true;
std::thread([](){
std::this_thread::sleep_for(std::chrono::seconds(5));
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(), genericTextDumpData);
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::DEFAULT), genericTextDumpData);
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTABLE_ORIG), genericOrigTextDumpData);
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTED), genericSplittedDumpData, "[split]");
genericTextDumpData.clear();
genericSplittedDumpData.clear();
genericOrigTextDumpData.clear();
inDumpGeneric = false;
}).detach();
}
@@ -318,25 +469,50 @@ namespace GakumasLocal::Local {
return true;
}

auto ret = false;

std::vector<std::string> unTransResultRet;
if (GetSplitTagsTranslation(origText, newStr, unTransResultRet)) {
return true;
const auto splitTransStat = GetSplitTagsTranslationFull(origText, newStr, unTransResultRet);
switch (splitTransStat) {
case SplitTagsTranslationStat::FULL_TRANS: {
return true;
} break;

case SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY: {
return false;
} break;

case SplitTagsTranslationStat::NO_SPLIT: {
ret = false;
} break;

case SplitTagsTranslationStat::NO_TRANS: {
ret = false;
} break;

case SplitTagsTranslationStat::PART_TRANS: {
ret = true;
} break;
}

if (!Config::dumpText) {
return false;
return ret;
}

if (unTransResultRet.empty()) {
if (unTransResultRet.empty() || (splitTransStat == SplitTagsTranslationStat::NO_SPLIT)) {
DumpGenericText(origText);
}
else {
for (const auto& i : unTransResultRet) {
DumpGenericText(i);
DumpGenericText(i, DumpStrStat::SPLITTED);
}
// 若未翻译部分长度为1,且未翻译文本等于原文本,则不 dump 到原文本文件
//if (unTransResultRet.size() != 1 || unTransResultRet[0] != origText) {
DumpGenericText(origText, DumpStrStat::SPLITTABLE_ORIG);
//}
}

return false;
return ret;
}

std::string ChangeDumpTextIndex(int changeValue) {
19 changes: 19 additions & 0 deletions app/src/main/cpp/deps/UnityResolve/UnityResolve.hpp
Original file line number Diff line number Diff line change
@@ -1393,6 +1393,25 @@ class UnityResolve final {
}
}

[[nodiscard]] auto ToWString() const -> std::u16string {
#if WINDOWS_MODE
if (IsBadReadPtr(this, sizeof(String))) return {};
if (IsBadReadPtr(m_firstChar, m_stringLength)) return {};
#endif
if (!this) return {};
try {
// using convert_typeX = std::codecvt_utf8<wchar_t>;
// std::wstring_convert<convert_typeX> converterX;
// return converterX.to_bytes(m_firstChar);
return {chars};
}
catch (std::exception& e) {
std::cout << "String Invoke Error\n";
GakumasLocal::Log::ErrorFmt("String Invoke Error: %s", e.what());
return {};
}
}

auto operator=(const std::string& newString) const -> String* { return New(newString); }

auto operator==(const std::wstring& newString) const -> bool { return Equals(newString); }

0 comments on commit cd82db9

Please sign in to comment.