A
Hallo!
Ich Zuge der Ferien baue ich gerade einen rudimentären XML-Interpreter. Nun habe ich einen kleinen Parser, Lexer und eine Vorform eines DOM-Baumes fertig gestellt, der XML-Syntax der Form
<root>
<node>data</node>
<node>
<more>data</more>
</node>
</root>
interpretieren kann (also erst mal ohne Attribute). Ich würde mich über Kritik oder Bugs freuen, damit ich das Programm robust machen kann.
Ein Beispielprogramm:
#include <fstream>
#include <vector>
#include <stdexcept>
#include <string>
#include <stack>
#include <vector>
#include <map>
#include <sstream>
#include <cctype>
#include <iostream>
namespace xml {
class tag {
public:
tag(const std::string& str, bool open)
: tagname_(str), value_(), open_(open) { }
tag(const std::string& info)
: tagname_(), value_(info), open_(false)
{
}
tag(const tag& t) {
if(t.is_tag()) {
tagname_ = t.tagname_;
open_ = t.open_;
}
else {
value_ = t.value_;
}
}
bool is_tag() const { return !tagname_.empty(); }
const std::string& name() const { return tagname_; }
const std::string& value() const { return value_; }
bool open() const { return open_; }
private:
std::string tagname_, value_;
bool open_;
};
bool operator <(const tag& t1, const tag& t2) {
return t1.name() < t2.name();
}
class lexer {
public:
lexer(std::fstream& fs) {
char c = 0;
bool in_tag = false;
unsigned line_counter = 1, column_counter = 0;
bool open = false;
std::string tagname, content;
bool single_space = true;
while(!fs.eof()) {
c = fs.get();
column_counter++;
switch(c) {
case ' ':
case '\n':
case '\t':
if(in_tag)
throw format_error("unexpected whitespace in tag name found", line_counter, column_counter);
if(c == '\n') {
line_counter++;
column_counter = 0;
}
if(c == ' ') {
if(single_space) {
content.push_back(c);
single_space = false;
}
}
continue;
case '<':
in_tag = true; // Reading Tag-Name begins
open = true;
if(!content.empty()) {
tags_.push_back(tag(content));
content.clear();
}
if(!std::isalpha((c = fs.get())) && c != '/') {
column_counter++;
throw format_error("invalid tag name beginning", line_counter, column_counter);
}
else
fs.putback(c);
continue;
case '>':
in_tag = false; // Reading Tag-Name ends
tags_.push_back(tag(tagname, open));
tagname.clear();
continue;
case '/':
if(in_tag)
open = false; // Close-Tag
continue; // next char
default:
single_space = true;
}
if(std::isalnum(c)) {
if(in_tag) {
tagname.push_back(c);
}
else {
content.push_back(c);
}
}
else {
if(in_tag)
throw format_error("invalid tag name", line_counter, column_counter);
}
}
}
class format_error : public std::exception {
public:
format_error(const char* message, unsigned line, unsigned column)
: std::exception(message), line_(line), column_(column)
{ }
unsigned line() const { return line_; }
unsigned column() const { return column_; }
private:
unsigned line_, column_;
};
const std::vector<tag>& tags() const { return tags_; }
private:
char next(std::fstream& fs) {
char c = ' ';
while(c == ' ')
c = fs.get();
return c;
}
std::vector<tag> tags_;
};
class node {
public:
class element_selecter {
public:
template <typename In>
element_selecter(In begin, In end)
: elements_(std::distance(begin, end))
{
for(unsigned n = 0; n < elements_.size(); ++n) {
elements_[n] = begin->second;
++begin;
}
}
node& operator [](unsigned index) const {
return *elements_.at(index);
}
private:
std::vector<node*> elements_;
};
node(const tag& tag, node* parent)
: name_(tag), data_(), children_(), parent_(parent)
{ }
~node() {
std::multimap<tag, node*>::iterator end = children_.end();
for(auto iter = children_.begin(); iter != end; ++iter) {
delete iter->second;
}
}
void add_data(const tag& t) {
data_.push_back(t);
}
void add_sibling(node* n) {
children_.insert(std::make_pair(n->get_tag(), n));
}
const tag& get_tag() const { return name_; }
node* get_parent() { return parent_; }
std::pair<std::multimap<tag, node*>::iterator, std::multimap<tag, node*>::iterator> find(const std::string& tagname) {
return children_.equal_range(tag(tagname, true));
}
const std::string& data() {
if(!children_.empty() || data_.empty()) {
std::stringstream ss;
ss << "DOM error: no text value for <" << name_.name() << "> available";
throw std::exception(ss.str().c_str());
}
return data_[0].value();
}
element_selecter operator [](const std::string& tagname) {
auto iter_pair = find(tagname);
if(iter_pair.first == iter_pair.second || iter_pair.first == children_.end()) {
std::stringstream ss;
ss << "error: \"tag <" << tagname << "> not found\"";
throw std::exception(ss.str().c_str());
}
return element_selecter(iter_pair.first, iter_pair.second);
}
const tag& operator ->() {
return name_;
}
private:
tag name_;
std::vector<tag> data_;
std::multimap<tag, node*> children_;
node* parent_;
};
class parser {
public:
parser(const std::vector<tag>& tags)
: tags_(tags)
{
// Syntax-Check
std::stack<const tag*> tag_stack;
for(std::size_t n = 0; n < tags.size(); ++n) {
if(tags[n].is_tag()) {
if(tags[n].open()) {
tag_stack.push(&tags[n]);
}
else {
if(tag_stack.empty()) {
std::stringstream ss;
ss << "tag mismatch: unexpected closing tag </" << tags[n].name() << "> found";
throw parse_error(ss.str().c_str());
}
if(tag_stack.top()->name() != tags[n].name()) {
std::stringstream ss;
ss << "tag mismatch: </" << tag_stack.top()->name() << "> expected, but </" << tags[n].name() << "> found";
throw parse_error(ss.str().c_str());
}
tag_stack.pop();
}
}
}
if(!tag_stack.empty()) {
std::stringstream ss;
ss << "closing tag </" << tag_stack.top()->name() << "> expected, but not found";
throw parse_error(ss.str().c_str());
}
}
node* generate() {
node* root = new node(tags_[0], 0);
node* current = root;
for(unsigned n = 1; n < tags_.size(); ++n) {
if(tags_[n].is_tag()) { // nested tag found
if(tags_[n].open()) { // if this tag is open, it's a new one
node* next = new node(tags_[n], current);
current->add_sibling(next);
current = next;
}
else { // current tag must have been closed (parser checks this in constructor)
current = current->get_parent(); // back to parent tag
}
}
else { // it's data
current->add_data(tags_[n]);
}
}
return root;
}
class parse_error : public std::exception {
public:
parse_error(const char* message)
: std::exception(message)
{ }
};
private:
const std::vector<tag>& tags_;
};
class document {
public:
document() { }
document(const std::string& filename)
: file_(filename.c_str(), std::ios_base::binary | std::ios_base::in | std::ios_base::out), root_(0)
{
if(!file_)
throw std::ios_base::failure("can't open XML file");
lexer lx(file_);
parser prs(lx.tags());
root_ = prs.generate();
}
void open(const std::string& filename) {
file_.open(filename.c_str(), std::ios_base::binary);
}
void close() {
file_.close();
}
friend std::ostream& operator << (std::ostream&, document&);
~document() {
delete root_;
}
node::element_selecter operator [](const std::string& index) {
auto iter_pair = root_->find(index);
if(iter_pair.first == iter_pair.second) {
std::stringstream ss;
ss << "error: \"tag <" << index << "> not found\"";
throw std::exception(ss.str().c_str());
}
return node::element_selecter(iter_pair.first, iter_pair.second);
}
private:
std::fstream file_;
node* root_;
};
std::ostream& operator << (std::ostream& os, document& doc) {
std::string line;
while(std::getline(doc.file_, line)) {
os << line << '\n';
}
if(os.fail() || os.eof())
os.clear();
return os;
}
}
int main() {
try {
xml::document doc("data.xml");
std::string name;
std::cout << "Bitte geben Sie den Namen des Kindes ein: ";
std::cin >> name;
std::cout << name << " ist " << doc["kinder"][0][name][0]["alter"][0].data() << ".";
}
catch(std::ios_base::failure& e) {
std::cerr << e.what() << '\n';
}
catch(xml::lexer::format_error& e) {
std::cerr << "format error: \"" << e.what() << "\" in line " << e.line() << " and column " << e.column() << '\n';
}
catch(xml::parser::parse_error& e) {
std::cerr << "parse error: \"" << e.what() << "\"\n";
}
catch(std::exception& e) {
std::cerr << e.what() << '\n';
}
std::cin.sync();
std::cin.get();
}
data.xml:
<familie>
<eltern>
<vater>Peter</vater>
<mutter>Hanna</mutter>
</eltern>
<kinder>
<Paul>
<alter>23</alter>
<geschlecht>maennlich</geschlecht>
</Paul>
<Lisa>
<alter>14</alter>
<geschlecht>weiblich</geschlecht>
</Lisa>
<Raoule>
<alter>18</alter>
</Raoule>
</kinder>
</familie>
Vielen Dank!