Added XML parser/writer
This commit is contained in:
parent
bfe2e2557b
commit
1f11fa8fea
@ -98,6 +98,18 @@ void BasicParser::skipWhitespace() {
|
||||
}
|
||||
}
|
||||
|
||||
void BasicParser::skip(size_t n) {
|
||||
n = std::min(n, source.length()-pos);
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
char next = source[pos++];
|
||||
if (next == '\n') {
|
||||
line++;
|
||||
linestart = pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BasicParser::skipLine() {
|
||||
while (hasNext()) {
|
||||
if (source[pos] == '\n') {
|
||||
@ -110,10 +122,28 @@ void BasicParser::skipLine() {
|
||||
}
|
||||
}
|
||||
|
||||
bool BasicParser::skipTo(const std::string& substring) {
|
||||
size_t idx = source.find(substring, pos);
|
||||
if (idx == std::string::npos) {
|
||||
skip(source.length()-pos);
|
||||
return false;
|
||||
} else {
|
||||
skip(idx-pos);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool BasicParser::hasNext() {
|
||||
return pos < source.length();
|
||||
}
|
||||
|
||||
bool BasicParser::isNext(const std::string& substring) {
|
||||
if (source.length() - pos < substring.length()) {
|
||||
return false;
|
||||
}
|
||||
return source.substr(pos, substring.length()) == substring;
|
||||
}
|
||||
|
||||
char BasicParser::nextChar() {
|
||||
if (!hasNext()) {
|
||||
throw error("unexpected end");
|
||||
@ -129,6 +159,17 @@ void BasicParser::expect(char expected) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
void BasicParser::expect(const std::string& substring) {
|
||||
if (substring.empty())
|
||||
return;
|
||||
for (uint i = 0; i < substring.length(); i++) {
|
||||
if (source.length() <= pos + i || source[pos+i] != substring[i]) {
|
||||
throw error(escape_string(substring)+" expected");
|
||||
}
|
||||
}
|
||||
pos += substring.length();
|
||||
}
|
||||
|
||||
void BasicParser::expectNewLine() {
|
||||
while (hasNext()) {
|
||||
char next = source[pos];
|
||||
@ -145,6 +186,10 @@ void BasicParser::expectNewLine() {
|
||||
}
|
||||
}
|
||||
|
||||
void BasicParser::goBack() {
|
||||
if (pos) pos--;
|
||||
}
|
||||
|
||||
char BasicParser::peek() {
|
||||
skipWhitespace();
|
||||
if (pos >= source.length()) {
|
||||
|
||||
@ -70,12 +70,17 @@ protected:
|
||||
uint linestart = 0;
|
||||
|
||||
virtual void skipWhitespace();
|
||||
void skip(size_t n);
|
||||
void skipLine();
|
||||
bool skipTo(const std::string& substring);
|
||||
void expect(char expected);
|
||||
void expect(const std::string& substring);
|
||||
char peek();
|
||||
char nextChar();
|
||||
bool hasNext();
|
||||
bool isNext(const std::string& substring);
|
||||
void expectNewLine();
|
||||
void goBack();
|
||||
|
||||
std::string parseName();
|
||||
int64_t parseSimpleInt(int base);
|
||||
|
||||
328
src/coders/xml.cpp
Normal file
328
src/coders/xml.cpp
Normal file
@ -0,0 +1,328 @@
|
||||
#include "xml.h"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
#include "../util/stringutil.h"
|
||||
|
||||
using namespace xml;
|
||||
|
||||
Attribute::Attribute(std::string name, std::string text)
|
||||
: name(name),
|
||||
text(text) {
|
||||
}
|
||||
|
||||
const std::string& Attribute::getName() const {
|
||||
return name;
|
||||
}
|
||||
|
||||
const std::string& Attribute::getText() const {
|
||||
return text;
|
||||
}
|
||||
|
||||
int64_t Attribute::asInt() const {
|
||||
return std::stoll(text);
|
||||
}
|
||||
|
||||
double Attribute::asFloat() const {
|
||||
return std::stod(text);
|
||||
}
|
||||
|
||||
bool Attribute::asBool() const {
|
||||
return text == "true" || text == "1";
|
||||
}
|
||||
|
||||
Node::Node(std::string tag) : tag(tag) {
|
||||
}
|
||||
|
||||
void Node::add(xmlelement element) {
|
||||
elements.push_back(element);
|
||||
}
|
||||
|
||||
void Node::set(std::string name, std::string text) {
|
||||
attrs.insert_or_assign(name, Attribute(name, text));
|
||||
}
|
||||
|
||||
const std::string& Node::getTag() const {
|
||||
return tag;
|
||||
}
|
||||
|
||||
const xmlattribute Node::attr(const std::string& name) const {
|
||||
auto found = attrs.find(name);
|
||||
if (found == attrs.end()) {
|
||||
throw std::runtime_error("element <"+tag+" ...> missing attribute "+name);
|
||||
}
|
||||
return found->second;
|
||||
}
|
||||
|
||||
const xmlattribute Node::attr(const std::string& name, const std::string& def) const {
|
||||
auto found = attrs.find(name);
|
||||
if (found == attrs.end()) {
|
||||
return Attribute(name, def);
|
||||
}
|
||||
return found->second;
|
||||
}
|
||||
|
||||
bool Node::has(const std::string& name) const {
|
||||
auto found = attrs.find(name);
|
||||
return found != attrs.end();
|
||||
}
|
||||
|
||||
xmlelement Node::sub(size_t index) {
|
||||
return elements.at(index);
|
||||
}
|
||||
|
||||
size_t Node::size() const {
|
||||
return elements.size();
|
||||
}
|
||||
|
||||
const std::vector<xmlelement>& Node::getElements() const {
|
||||
return elements;
|
||||
}
|
||||
|
||||
const xmlelements_map& Node::getAttributes() const {
|
||||
return attrs;
|
||||
}
|
||||
|
||||
Document::Document(std::string version, std::string encoding)
|
||||
: version(version),
|
||||
encoding(encoding) {
|
||||
}
|
||||
|
||||
void Document::setRoot(xmlelement element) {
|
||||
this->root = element;
|
||||
}
|
||||
|
||||
xmlelement Document::getRoot() const {
|
||||
return root;
|
||||
}
|
||||
|
||||
const std::string& Document::getVersion() const {
|
||||
return version;
|
||||
}
|
||||
|
||||
const std::string& Document::getEncoding() const {
|
||||
return encoding;
|
||||
}
|
||||
|
||||
Parser::Parser(std::string filename, std::string source)
|
||||
: BasicParser(filename, source) {
|
||||
}
|
||||
|
||||
xmlelement Parser::parseOpenTag() {
|
||||
std::string tag = parseName();
|
||||
auto node = std::make_shared<Node>(tag);
|
||||
|
||||
char c;
|
||||
while (true) {
|
||||
skipWhitespace();
|
||||
c = peek();
|
||||
if (c == '/' || c == '>' || c == '?')
|
||||
break;
|
||||
std::string attrname = parseName();
|
||||
std::string attrtext = "";
|
||||
skipWhitespace();
|
||||
if (peek() == '=') {
|
||||
nextChar();
|
||||
skipWhitespace();
|
||||
expect('"');
|
||||
attrtext = parseString('"');
|
||||
}
|
||||
node->set(attrname, attrtext);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
void Parser::parseDeclaration() {
|
||||
std::string version = "1.0";
|
||||
std::string encoding = "UTF-8";
|
||||
expect('<');
|
||||
if (peek() == '?') {
|
||||
nextChar();
|
||||
xmlelement node = parseOpenTag();
|
||||
expect("?>");
|
||||
if (node->getTag() != "xml") {
|
||||
throw error("invalid declaration");
|
||||
}
|
||||
version = node->attr("version", version).getText();
|
||||
encoding = node->attr("encoding", encoding).getText();
|
||||
if (encoding != "utf-8" && encoding != "UTF-8") {
|
||||
throw error("UTF-8 encoding is only supported");
|
||||
}
|
||||
} else {
|
||||
goBack();
|
||||
}
|
||||
document = std::make_shared<Document>(version, encoding);
|
||||
}
|
||||
|
||||
void Parser::parseComment() {
|
||||
expect("!--");
|
||||
if (skipTo("-->")) {
|
||||
skip(3);
|
||||
} else {
|
||||
throw error("comment close missing");
|
||||
}
|
||||
}
|
||||
|
||||
std::string Parser::parseText() {
|
||||
size_t start = pos;
|
||||
while (hasNext()) {
|
||||
char c = peek();
|
||||
if (c == '<') {
|
||||
break;
|
||||
}
|
||||
nextChar();
|
||||
}
|
||||
return source.substr(start, pos-start);
|
||||
}
|
||||
|
||||
xmlelement Parser::parseElement() {
|
||||
// text element
|
||||
if (peek() != '<') {
|
||||
auto element = std::make_shared<Node>("#");
|
||||
auto text = parseText();
|
||||
util::replaceAll(text, """, "\"");
|
||||
util::replaceAll(text, "'", "'");
|
||||
util::replaceAll(text, "<", "<");
|
||||
util::replaceAll(text, ">", ">");
|
||||
util::replaceAll(text, "&", "&");
|
||||
element->set("#", text);
|
||||
return element;
|
||||
}
|
||||
nextChar();
|
||||
|
||||
// <!--element-->
|
||||
if (peek() == '!') {
|
||||
if (isNext("!DOCTYPE ")) {
|
||||
throw error("XML DTD is not supported yet");
|
||||
}
|
||||
parseComment();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto element = parseOpenTag();
|
||||
char c = nextChar();
|
||||
|
||||
// <element/>
|
||||
if (c == '/') {
|
||||
expect('>');
|
||||
}
|
||||
// <element>...</element>
|
||||
else if (c == '>') {
|
||||
skipWhitespace();
|
||||
while (!isNext("</")) {
|
||||
auto sub = parseElement();
|
||||
if (sub) {
|
||||
element->add(sub);
|
||||
}
|
||||
skipWhitespace();
|
||||
}
|
||||
skip(2);
|
||||
expect(element->getTag());
|
||||
expect('>');
|
||||
}
|
||||
// <element?>
|
||||
else {
|
||||
throw error("invalid syntax");
|
||||
}
|
||||
return element;
|
||||
}
|
||||
|
||||
xmldocument Parser::parse() {
|
||||
parseDeclaration();
|
||||
document->setRoot(parseElement());
|
||||
return document;
|
||||
}
|
||||
|
||||
xmldocument xml::parse(std::string filename, std::string source) {
|
||||
Parser parser(filename, source);
|
||||
return parser.parse();
|
||||
}
|
||||
|
||||
inline void newline(
|
||||
std::stringstream& ss,
|
||||
bool nice,
|
||||
const std::string& indentStr,
|
||||
int indent
|
||||
) {
|
||||
if (!nice)
|
||||
return;
|
||||
ss << '\n';
|
||||
for (int i = 0; i < indent; i++) {
|
||||
ss << indentStr;
|
||||
}
|
||||
}
|
||||
|
||||
static void stringifyElement(
|
||||
std::stringstream& ss,
|
||||
const xmlelement element,
|
||||
bool nice,
|
||||
const std::string& indentStr,
|
||||
int indent
|
||||
) {
|
||||
if (element->isText()) {
|
||||
std::string text = element->attr("#").getText();
|
||||
util::replaceAll(text, "&", "&");
|
||||
util::replaceAll(text, "\"",""");
|
||||
util::replaceAll(text, "'", "'");
|
||||
util::replaceAll(text, "<", "<");
|
||||
util::replaceAll(text, ">", ">");
|
||||
ss << text;
|
||||
return;
|
||||
}
|
||||
const std::string& tag = element->getTag();
|
||||
|
||||
ss << '<' << tag;
|
||||
auto& attrs = element->getAttributes();
|
||||
if (!attrs.empty()) {
|
||||
ss << ' ';
|
||||
int count = 0;
|
||||
for (auto& entry : attrs) {
|
||||
auto attr = entry.second;
|
||||
ss << attr.getName();
|
||||
if (!attr.getText().empty()) {
|
||||
ss << "=" << escape_string(attr.getText());
|
||||
}
|
||||
if (count + 1 < int(attrs.size())) {
|
||||
ss << " ";
|
||||
}
|
||||
count++;
|
||||
}
|
||||
}
|
||||
auto& elements = element->getElements();
|
||||
if (elements.size() == 1 && elements[0]->isText()) {
|
||||
ss << ">";
|
||||
stringifyElement(ss, elements[0], nice, indentStr, indent+1);
|
||||
ss << "</" << tag << ">";
|
||||
return;
|
||||
}
|
||||
if (!elements.empty()) {
|
||||
ss << '>';
|
||||
for (auto& sub : elements) {
|
||||
newline(ss, nice, indentStr, indent+1);
|
||||
stringifyElement(ss, sub, nice, indentStr, indent+1);
|
||||
}
|
||||
newline(ss, nice, indentStr, indent);
|
||||
ss << "</" << tag << ">";
|
||||
|
||||
} else {
|
||||
ss << "/>";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::string xml::stringify(
|
||||
const xmldocument document,
|
||||
bool nice,
|
||||
const std::string& indentStr
|
||||
) {
|
||||
std::stringstream ss;
|
||||
|
||||
// XML declaration
|
||||
ss << "<?xml version=\"" << document->getVersion();
|
||||
ss << "\" encoding=\"UTF-8\" ?>";
|
||||
newline(ss, nice, indentStr, 0);
|
||||
|
||||
stringifyElement(ss, document->getRoot(), nice, indentStr, 0);
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
132
src/coders/xml.h
Normal file
132
src/coders/xml.h
Normal file
@ -0,0 +1,132 @@
|
||||
#ifndef CODERS_XML_H_
|
||||
#define CODERS_XML_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "commons.h"
|
||||
|
||||
namespace xml {
|
||||
class Node;
|
||||
class Attribute;
|
||||
class Document;
|
||||
|
||||
typedef Attribute xmlattribute;
|
||||
typedef std::shared_ptr<Node> xmlelement;
|
||||
typedef std::shared_ptr<Document> xmldocument;
|
||||
typedef std::unordered_map<std::string, xmlattribute> xmlelements_map;
|
||||
|
||||
class Attribute {
|
||||
std::string name;
|
||||
std::string text;
|
||||
public:
|
||||
Attribute(std::string name, std::string text);
|
||||
|
||||
const std::string& getName() const;
|
||||
const std::string& getText() const;
|
||||
int64_t asInt() const;
|
||||
double asFloat() const;
|
||||
bool asBool() const;
|
||||
};
|
||||
|
||||
/* XML element class. Text element has tag 'text' and attribute 'text' */
|
||||
class Node {
|
||||
std::string tag;
|
||||
std::unordered_map<std::string, xmlattribute> attrs;
|
||||
std::vector<xmlelement> elements;
|
||||
public:
|
||||
Node(std::string tag);
|
||||
|
||||
/* Add sub-element */
|
||||
void add(xmlelement element);
|
||||
|
||||
/* Set attribute value. Creates attribute if does not exists */
|
||||
void set(std::string name, std::string text);
|
||||
|
||||
/* Get element tag */
|
||||
const std::string& getTag() const;
|
||||
|
||||
inline bool isText() const {
|
||||
return getTag() == "#";
|
||||
}
|
||||
|
||||
inline const std::string& text() const {
|
||||
return attr("#").getText();
|
||||
}
|
||||
|
||||
/* Get attribute by name
|
||||
@param name attribute name
|
||||
@throws std::runtime_error if element has no attribute
|
||||
@return xmlattribute - {name, value} */
|
||||
const xmlattribute attr(const std::string& name) const;
|
||||
/* Get attribute by name
|
||||
@param name name
|
||||
@param def default value will be returned wrapped in xmlattribute
|
||||
if element has no attribute
|
||||
@return xmlattribute - {name, value} or {name, def} if not found*/
|
||||
const xmlattribute attr(const std::string& name, const std::string& def) const;
|
||||
|
||||
/* Check if element has attribute
|
||||
@param name attribute name */
|
||||
bool has(const std::string& name) const;
|
||||
|
||||
/* Get sub-element by index
|
||||
@throws std::out_of_range if an invalid index given */
|
||||
xmlelement sub(size_t index);
|
||||
|
||||
/* Get number of sub-elements */
|
||||
size_t size() const;
|
||||
|
||||
const std::vector<xmlelement>& getElements() const;
|
||||
const xmlelements_map& getAttributes() const;
|
||||
};
|
||||
|
||||
class Document {
|
||||
xmlelement root = nullptr;
|
||||
std::string version;
|
||||
std::string encoding;
|
||||
public:
|
||||
Document(std::string version, std::string encoding);
|
||||
|
||||
void setRoot(xmlelement element);
|
||||
xmlelement getRoot() const;
|
||||
|
||||
const std::string& getVersion() const;
|
||||
const std::string& getEncoding() const;
|
||||
};
|
||||
|
||||
class Parser : public BasicParser {
|
||||
xmldocument document;
|
||||
|
||||
xmlelement parseOpenTag();
|
||||
xmlelement parseElement();
|
||||
void parseDeclaration();
|
||||
void parseComment();
|
||||
std::string parseText();
|
||||
public:
|
||||
Parser(std::string filename, std::string source);
|
||||
|
||||
xmldocument parse();
|
||||
};
|
||||
|
||||
/* Serialize XML Document to string
|
||||
@param document serializing document
|
||||
@param nice use human readable format
|
||||
(with indents and line-separators)
|
||||
@param indentStr indentation characters sequence
|
||||
(default - 4 spaces)*/
|
||||
extern std::string stringify(
|
||||
const xmldocument document,
|
||||
bool nice=true,
|
||||
const std::string& indentStr=" "
|
||||
);
|
||||
|
||||
/* Read XML Document from string
|
||||
@param filename file name will be shown in error messages
|
||||
@param source xml source code string */
|
||||
extern xmldocument parse(std::string filename, std::string source);
|
||||
}
|
||||
|
||||
#endif // CODERS_XML_H_
|
||||
@ -252,3 +252,18 @@ std::vector<ubyte> util::base64_decode(const char* str, size_t size) {
|
||||
std::vector<ubyte> util::base64_decode(const std::string& str) {
|
||||
return base64_decode(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
int util::replaceAll(std::string& str, const std::string& from, const std::string& to) {
|
||||
int count = 0;
|
||||
size_t offset = 0;
|
||||
while (true) {
|
||||
size_t start_pos = str.find(from, offset);
|
||||
if(start_pos == std::string::npos)
|
||||
break;
|
||||
str.replace(start_pos, from.length(), to);
|
||||
offset = start_pos + to.length();
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@ -26,6 +26,8 @@ namespace util {
|
||||
extern std::string base64_encode(const ubyte* data, size_t size);
|
||||
extern std::vector<ubyte> base64_decode(const char* str, size_t size);
|
||||
extern std::vector<ubyte> base64_decode(const std::string& str);
|
||||
|
||||
extern int replaceAll(std::string& str, const std::string& from, const std::string& to);
|
||||
}
|
||||
|
||||
#endif // UTIL_STRINGUTIL_H_
|
||||
Loading…
x
Reference in New Issue
Block a user