134 lines
3.3 KiB
C++
134 lines
3.3 KiB
C++
#include "stringutil.h"
|
|
|
|
#include <vector>
|
|
#include <locale>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
|
|
using std::vector;
|
|
using std::string;
|
|
using std::stringstream;
|
|
using std::wstring;
|
|
using std::wstringstream;
|
|
|
|
wstring util::lfill(wstring s, uint length, wchar_t c) {
|
|
if (s.length() >= length) {
|
|
return s;
|
|
}
|
|
wstringstream ss;
|
|
for (uint i = 0; i < length-s.length(); i++) {
|
|
ss << c;
|
|
}
|
|
ss << s;
|
|
return ss.str();
|
|
}
|
|
|
|
wstring util::rfill(wstring s, uint length, wchar_t c) {
|
|
if (s.length() >= length) {
|
|
return s;
|
|
}
|
|
wstringstream ss;
|
|
ss << s;
|
|
for (uint i = 0; i < length-s.length(); i++) {
|
|
ss << c;
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
uint util::encode_utf8(uint32_t c, ubyte* bytes) {
|
|
if (c < 0x80) {
|
|
bytes[0] = ((c >> 0) & 0x7F) | 0x00;
|
|
return 1;
|
|
} else if (c < 0x0800) {
|
|
bytes[0] = ((c >> 6) & 0x1F) | 0xC0;
|
|
bytes[1] = ((c >> 0) & 0x3F) | 0x80;
|
|
return 2;
|
|
} else if (c < 0x010000) {
|
|
bytes[0] = ((c >> 12) & 0x0F) | 0xE0;
|
|
bytes[1] = ((c >> 6) & 0x3F) | 0x80;
|
|
bytes[2] = ((c >> 0) & 0x3F) | 0x80;
|
|
return 3;
|
|
} else {
|
|
bytes[0] = ((c >> 18) & 0x07) | 0xF0;
|
|
bytes[1] = ((c >> 12) & 0x3F) | 0x80;
|
|
bytes[2] = ((c >> 6) & 0x3F) | 0x80;
|
|
bytes[3] = ((c >> 0) & 0x3F) | 0x80;
|
|
return 4;
|
|
}
|
|
}
|
|
|
|
struct utf_t {
|
|
char mask;
|
|
char lead;
|
|
uint32_t beg;
|
|
uint32_t end;
|
|
int bits_stored;
|
|
};
|
|
|
|
const utf_t utf[] = {
|
|
/* mask lead beg end bits */
|
|
{(char)0b00111111, (char)0b10000000, 0, 0, 6},
|
|
{(char)0b01111111, (char)0b00000000, 0000, 0177, 7},
|
|
{(char)0b00011111, (char)0b11000000, 0200, 03777, 5},
|
|
{(char)0b00001111, (char)0b11100000, 04000, 0177777, 4},
|
|
{(char)0b00000111, (char)0b11110000, 0200000, 04177777, 3},
|
|
{0, 0, 0, 0, 0},
|
|
};
|
|
|
|
|
|
inline uint utf8_len(ubyte cp) {
|
|
uint len = 0;
|
|
for (const utf_t* u = utf; u->mask; ++u) {
|
|
if((cp >= u->beg) && (cp <= u->end)) {
|
|
break;
|
|
}
|
|
++len;
|
|
}
|
|
if(len > 4) /* Out of bounds */
|
|
throw std::runtime_error("utf-8 decode error");
|
|
|
|
return len;
|
|
}
|
|
|
|
extern uint32_t util::decode_utf8(uint& size, const char* chr) {
|
|
size = utf8_len(*chr);
|
|
int shift = utf[0].bits_stored * (size - 1);
|
|
uint32_t code = (*chr++ & utf[size].mask) << shift;
|
|
|
|
for(uint i = 1; i < size; ++i, ++chr) {
|
|
shift -= utf[0].bits_stored;
|
|
code |= ((char)*chr & utf[0].mask) << shift;
|
|
}
|
|
return code;
|
|
}
|
|
|
|
string util::wstr2str_utf8(const wstring ws) {
|
|
vector<char> chars;
|
|
char buffer[4];
|
|
for (wchar_t wc : ws) {
|
|
uint size = encode_utf8((uint)wc, (ubyte*)buffer);
|
|
for (uint i = 0; i < size; i++) {
|
|
chars.push_back(buffer[i]);
|
|
}
|
|
}
|
|
return string(chars.data(), chars.size());
|
|
}
|
|
|
|
wstring util::str2wstr_utf8(const string s) {
|
|
vector<wchar_t> chars;
|
|
size_t pos = 0;
|
|
uint size = 0;
|
|
while (pos < s.length()) {
|
|
chars.push_back(decode_utf8(size, &s.at(pos)));
|
|
pos += size;
|
|
}
|
|
return wstring(chars.data(), chars.size());
|
|
}
|
|
|
|
bool util::is_integer(string text) {
|
|
for (char c : text) {
|
|
if (c < '0' || c > '9')
|
|
return false;
|
|
}
|
|
return true;
|
|
} |