diff --git a/src/util/stringutil.cpp b/src/util/stringutil.cpp index c494a759..8f3bddd8 100644 --- a/src/util/stringutil.cpp +++ b/src/util/stringutil.cpp @@ -141,6 +141,19 @@ extern uint32_t util::decode_utf8(uint& size, const char* chr) { return code; } +size_t util::crop_utf8(std::string_view s, size_t maxSize) { + size_t pos = 0; + uint size = 0; + while (pos < s.length()) { + decode_utf8(size, &s.at(pos)); + if (pos + size > maxSize) { + return pos; + } + pos += size; + } + return pos; +} + std::string util::wstr2str_utf8(const std::wstring& ws) { std::vector chars; char buffer[4]; diff --git a/src/util/stringutil.hpp b/src/util/stringutil.hpp index 54a0a5e2..14ba9432 100644 --- a/src/util/stringutil.hpp +++ b/src/util/stringutil.hpp @@ -17,8 +17,23 @@ namespace util { uint encode_utf8(uint32_t c, ubyte* bytes); uint32_t decode_utf8(uint& size, const char* bytes); + + /// @brief Encode raw wstring to UTF-8 + /// @param ws source raw wstring + /// @return new UTF-8 encoded string std::string wstr2str_utf8(const std::wstring& ws); + + /// @brief Decode UTF + /// @param s source encoded string + /// @return new raw decoded string std::wstring str2wstr_utf8(const std::string& s); + + /// @brief Calculated length of UTF-8 encoded string that fits into maxSize + /// @param s source UTF-8 encoded string view + /// @param maxSize max encoded string length after crop + /// @return cropped string size (less or equal to maxSize) + size_t crop_utf8(std::string_view s, size_t maxSize); + bool is_integer(const std::string& text); bool is_integer(const std::wstring& text); bool is_valid_filename(const std::wstring& name); diff --git a/test/util/stringutil.cpp b/test/util/stringutil.cpp new file mode 100644 index 00000000..d2172380 --- /dev/null +++ b/test/util/stringutil.cpp @@ -0,0 +1,10 @@ +#include "util/stringutil.hpp" + +#include + +TEST(stringutil, crop_utf8) { + // Project source files must be UTF-8 encoded + std::string str = "пример"; + str = str.substr(0, util::crop_utf8(str, 7)); + EXPECT_EQ(str, "при"); +}