-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #48 from BianchTech/46-refactor-move-string-manipu…
…lation-code-to-a-utility-folder refactor(string_operations.h/cpp): add new util operations
- Loading branch information
Showing
7 changed files
with
116 additions
and
129 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#include <unicode/locid.h> | ||
#include <unicode/unistr.h> | ||
#include <unicode/ustream.h> | ||
#include <codecvt> | ||
#include <locale> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
namespace utils { | ||
|
||
/** | ||
* @define DELIMITER | ||
* @brief Delimiter used to split strings. | ||
*/ | ||
constexpr std::string DELIMITER = " "; | ||
|
||
/** | ||
* @brief Mapping to normalize accented characters to their ASCII | ||
* equivalents. | ||
*/ | ||
const std::unordered_map<wchar_t, wchar_t> accentMap_ = { | ||
{L'á', L'a'}, {L'à', L'a'}, {L'â', L'a'}, {L'ã', L'a'}, {L'ä', L'a'}, | ||
{L'é', L'e'}, {L'è', L'e'}, {L'ê', L'e'}, {L'ë', L'e'}, {L'í', L'i'}, | ||
{L'ì', L'i'}, {L'î', L'i'}, {L'ï', L'i'}, {L'ó', L'o'}, {L'ò', L'o'}, | ||
{L'ô', L'o'}, {L'õ', L'o'}, {L'ö', L'o'}, {L'ú', L'u'}, {L'ù', L'u'}, | ||
{L'û', L'u'}, {L'ü', L'u'}, {L'ç', L'c'}, {L'Á', L'A'}, {L'À', L'A'}, | ||
{L'Â', L'A'}, {L'Ã', L'A'}, {L'Ä', L'A'}, {L'É', L'E'}, {L'È', L'E'}, | ||
{L'Ê', L'E'}, {L'Ë', L'E'}, {L'Í', L'I'}, {L'Ì', L'I'}, {L'Î', L'I'}, | ||
{L'Ï', L'I'}, {L'Ó', L'O'}, {L'Ò', L'O'}, {L'Ô', L'O'}, {L'Õ', L'O'}, | ||
{L'Ö', L'O'}, {L'Ú', L'U'}, {L'Ù', L'U'}, {L'Û', L'U'}, {L'Ü', L'U'}, | ||
{L'Ç', L'C'}}; | ||
// Funções para manipular string | ||
|
||
/** | ||
* @brief Removes unwanted characters from a string, such as extra spaces. | ||
* @param input String to process. | ||
*/ | ||
void shrink_string(std::string* input); | ||
|
||
/** | ||
* @brief Splits a string based on a delimiter. | ||
* @param s String to split. | ||
* @param delimiter Delimiter to split the string. | ||
* @return Vector of strings resulting from the split. | ||
*/ | ||
std::vector<std::string> split(std::string& s, const std::string& delimiter); | ||
|
||
/** | ||
* @brief Removes accents from a string. | ||
* @param input The input string. | ||
* @return The string without accents. | ||
*/ | ||
std::string removeAccents(const std::string& input); | ||
|
||
} // namespace utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#include "utils/string_operations.h" | ||
|
||
namespace utils { | ||
|
||
void shrink_string(std::string* input) { | ||
if (!input) | ||
return; // Verifica se o ponteiro é válido | ||
|
||
icu::UnicodeString ustr(input->c_str(), "UTF-8"); | ||
ustr.toLower(); | ||
std::string result; | ||
ustr.toUTF8String(result); | ||
*input = result; | ||
} | ||
|
||
std::vector<std::string> split(std::string& s, const std::string& delimiter) { | ||
std::vector<std::string> tokens; | ||
size_t pos = 0; | ||
std::string token; | ||
while ((pos = s.find(delimiter)) != std::string::npos) { | ||
token = s.substr(0, pos); | ||
tokens.push_back(token); | ||
s.erase(0, pos + delimiter.length()); | ||
} | ||
tokens.push_back(s); | ||
|
||
return tokens; | ||
} | ||
|
||
std::string removeAccents(const std::string& input) { | ||
std::wstring winput = | ||
std::wstring_convert<std::codecvt_utf8<wchar_t>>().from_bytes(input); | ||
std::wstring woutput; | ||
woutput.reserve(winput.size()); // Evitar alocações desnecessárias | ||
|
||
// Processar a string como wstring | ||
for (wchar_t ch : winput) { | ||
if (accentMap_.count(ch)) { | ||
woutput.push_back(accentMap_.at(ch)); // Substituir acentuados | ||
} else { | ||
woutput.push_back(ch); // Mantém o caractere não acentuado | ||
} | ||
} | ||
|
||
// Converter de volta para std::string | ||
return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(woutput); | ||
} | ||
|
||
} // namespace utils |