VoxelEngine/src/util/stringutil.cpp
2023-11-16 12:26:58 +03:00

134 lines
3.3 KiB
C++

#include "stringutil.h"
#include <vector>
#include <locale>
#include <sstream>
#include <stdexcept>
using std::vector;
using std::string;
using std::stringstream;
using std::wstring;
using std::wstringstream;
wstring util::lfill(wstring s, uint length, wchar_t c) {
if (s.length() >= length) {
return s;
}
wstringstream ss;
for (uint i = 0; i < length-s.length(); i++) {
ss << c;
}
ss << s;
return ss.str();
}
wstring util::rfill(wstring s, uint length, wchar_t c) {
if (s.length() >= length) {
return s;
}
wstringstream ss;
ss << s;
for (uint i = 0; i < length-s.length(); i++) {
ss << c;
}
return ss.str();
}
uint util::encode_utf8(uint32_t c, ubyte* bytes) {
if (c < 0x80) {
bytes[0] = ((c >> 0) & 0x7F) | 0x00;
return 1;
} else if (c < 0x0800) {
bytes[0] = ((c >> 6) & 0x1F) | 0xC0;
bytes[1] = ((c >> 0) & 0x3F) | 0x80;
return 2;
} else if (c < 0x010000) {
bytes[0] = ((c >> 12) & 0x0F) | 0xE0;
bytes[1] = ((c >> 6) & 0x3F) | 0x80;
bytes[2] = ((c >> 0) & 0x3F) | 0x80;
return 3;
} else {
bytes[0] = ((c >> 18) & 0x07) | 0xF0;
bytes[1] = ((c >> 12) & 0x3F) | 0x80;
bytes[2] = ((c >> 6) & 0x3F) | 0x80;
bytes[3] = ((c >> 0) & 0x3F) | 0x80;
return 4;
}
}
struct utf_t {
char mask;
char lead;
uint32_t beg;
uint32_t end;
int bits_stored;
};
const utf_t utf[] = {
/* mask lead beg end bits */
{(char)0b00111111, (char)0b10000000, 0, 0, 6},
{(char)0b01111111, (char)0b00000000, 0000, 0177, 7},
{(char)0b00011111, (char)0b11000000, 0200, 03777, 5},
{(char)0b00001111, (char)0b11100000, 04000, 0177777, 4},
{(char)0b00000111, (char)0b11110000, 0200000, 04177777, 3},
{0, 0, 0, 0, 0},
};
inline uint utf8_len(ubyte cp) {
uint len = 0;
for (const utf_t* u = utf; u->mask; ++u) {
if((cp >= u->beg) && (cp <= u->end)) {
break;
}
++len;
}
if(len > 4) /* Out of bounds */
throw std::runtime_error("utf-8 decode error");
return len;
}
extern uint32_t util::decode_utf8(uint& size, const char* chr) {
size = utf8_len(*chr);
int shift = utf[0].bits_stored * (size - 1);
uint32_t code = (*chr++ & utf[size].mask) << shift;
for(uint i = 1; i < size; ++i, ++chr) {
shift -= utf[0].bits_stored;
code |= ((char)*chr & utf[0].mask) << shift;
}
return code;
}
string util::wstr2str_utf8(const wstring ws) {
vector<char> chars;
char buffer[4];
for (wchar_t wc : ws) {
uint size = encode_utf8((uint)wc, (ubyte*)buffer);
for (uint i = 0; i < size; i++) {
chars.push_back(buffer[i]);
}
}
return string(chars.data(), chars.size());
}
wstring util::str2wstr_utf8(const string s) {
vector<wchar_t> chars;
size_t pos = 0;
uint size = 0;
while (pos < s.length()) {
chars.push_back(decode_utf8(size, &s.at(pos)));
pos += size;
}
return wstring(chars.data(), chars.size());
}
bool util::is_integer(string text) {
for (char c : text) {
if (c < '0' || c > '9')
return false;
}
return true;
}