Suche Tester
-
Hallo!
Ich Zuge der Ferien baue ich gerade einen rudimentären XML-Interpreter. Nun habe ich einen kleinen Parser, Lexer und eine Vorform eines DOM-Baumes fertig gestellt, der XML-Syntax der Form<root> <node>data</node> <node> <more>data</more> </node> </root>
interpretieren kann (also erst mal ohne Attribute). Ich würde mich über Kritik oder Bugs freuen, damit ich das Programm robust machen kann.
Ein Beispielprogramm:
#include <fstream> #include <vector> #include <stdexcept> #include <string> #include <stack> #include <vector> #include <map> #include <sstream> #include <cctype> #include <iostream> namespace xml { class tag { public: tag(const std::string& str, bool open) : tagname_(str), value_(), open_(open) { } tag(const std::string& info) : tagname_(), value_(info), open_(false) { } tag(const tag& t) { if(t.is_tag()) { tagname_ = t.tagname_; open_ = t.open_; } else { value_ = t.value_; } } bool is_tag() const { return !tagname_.empty(); } const std::string& name() const { return tagname_; } const std::string& value() const { return value_; } bool open() const { return open_; } private: std::string tagname_, value_; bool open_; }; bool operator <(const tag& t1, const tag& t2) { return t1.name() < t2.name(); } class lexer { public: lexer(std::fstream& fs) { char c = 0; bool in_tag = false; unsigned line_counter = 1, column_counter = 0; bool open = false; std::string tagname, content; bool single_space = true; while(!fs.eof()) { c = fs.get(); column_counter++; switch(c) { case ' ': case '\n': case '\t': if(in_tag) throw format_error("unexpected whitespace in tag name found", line_counter, column_counter); if(c == '\n') { line_counter++; column_counter = 0; } if(c == ' ') { if(single_space) { content.push_back(c); single_space = false; } } continue; case '<': in_tag = true; // Reading Tag-Name begins open = true; if(!content.empty()) { tags_.push_back(tag(content)); content.clear(); } if(!std::isalpha((c = fs.get())) && c != '/') { column_counter++; throw format_error("invalid tag name beginning", line_counter, column_counter); } else fs.putback(c); continue; case '>': in_tag = false; // Reading Tag-Name ends tags_.push_back(tag(tagname, open)); tagname.clear(); continue; case '/': if(in_tag) open = false; // Close-Tag continue; // next char default: single_space = true; } if(std::isalnum(c)) { if(in_tag) { tagname.push_back(c); } else { content.push_back(c); } } else { if(in_tag) throw format_error("invalid tag name", line_counter, column_counter); } } } class format_error : public std::exception { public: format_error(const char* message, unsigned line, unsigned column) : std::exception(message), line_(line), column_(column) { } unsigned line() const { return line_; } unsigned column() const { return column_; } private: unsigned line_, column_; }; const std::vector<tag>& tags() const { return tags_; } private: char next(std::fstream& fs) { char c = ' '; while(c == ' ') c = fs.get(); return c; } std::vector<tag> tags_; }; class node { public: class element_selecter { public: template <typename In> element_selecter(In begin, In end) : elements_(std::distance(begin, end)) { for(unsigned n = 0; n < elements_.size(); ++n) { elements_[n] = begin->second; ++begin; } } node& operator [](unsigned index) const { return *elements_.at(index); } private: std::vector<node*> elements_; }; node(const tag& tag, node* parent) : name_(tag), data_(), children_(), parent_(parent) { } ~node() { std::multimap<tag, node*>::iterator end = children_.end(); for(auto iter = children_.begin(); iter != end; ++iter) { delete iter->second; } } void add_data(const tag& t) { data_.push_back(t); } void add_sibling(node* n) { children_.insert(std::make_pair(n->get_tag(), n)); } const tag& get_tag() const { return name_; } node* get_parent() { return parent_; } std::pair<std::multimap<tag, node*>::iterator, std::multimap<tag, node*>::iterator> find(const std::string& tagname) { return children_.equal_range(tag(tagname, true)); } const std::string& data() { if(!children_.empty() || data_.empty()) { std::stringstream ss; ss << "DOM error: no text value for <" << name_.name() << "> available"; throw std::exception(ss.str().c_str()); } return data_[0].value(); } element_selecter operator [](const std::string& tagname) { auto iter_pair = find(tagname); if(iter_pair.first == iter_pair.second || iter_pair.first == children_.end()) { std::stringstream ss; ss << "error: \"tag <" << tagname << "> not found\""; throw std::exception(ss.str().c_str()); } return element_selecter(iter_pair.first, iter_pair.second); } const tag& operator ->() { return name_; } private: tag name_; std::vector<tag> data_; std::multimap<tag, node*> children_; node* parent_; }; class parser { public: parser(const std::vector<tag>& tags) : tags_(tags) { // Syntax-Check std::stack<const tag*> tag_stack; for(std::size_t n = 0; n < tags.size(); ++n) { if(tags[n].is_tag()) { if(tags[n].open()) { tag_stack.push(&tags[n]); } else { if(tag_stack.empty()) { std::stringstream ss; ss << "tag mismatch: unexpected closing tag </" << tags[n].name() << "> found"; throw parse_error(ss.str().c_str()); } if(tag_stack.top()->name() != tags[n].name()) { std::stringstream ss; ss << "tag mismatch: </" << tag_stack.top()->name() << "> expected, but </" << tags[n].name() << "> found"; throw parse_error(ss.str().c_str()); } tag_stack.pop(); } } } if(!tag_stack.empty()) { std::stringstream ss; ss << "closing tag </" << tag_stack.top()->name() << "> expected, but not found"; throw parse_error(ss.str().c_str()); } } node* generate() { node* root = new node(tags_[0], 0); node* current = root; for(unsigned n = 1; n < tags_.size(); ++n) { if(tags_[n].is_tag()) { // nested tag found if(tags_[n].open()) { // if this tag is open, it's a new one node* next = new node(tags_[n], current); current->add_sibling(next); current = next; } else { // current tag must have been closed (parser checks this in constructor) current = current->get_parent(); // back to parent tag } } else { // it's data current->add_data(tags_[n]); } } return root; } class parse_error : public std::exception { public: parse_error(const char* message) : std::exception(message) { } }; private: const std::vector<tag>& tags_; }; class document { public: document() { } document(const std::string& filename) : file_(filename.c_str(), std::ios_base::binary | std::ios_base::in | std::ios_base::out), root_(0) { if(!file_) throw std::ios_base::failure("can't open XML file"); lexer lx(file_); parser prs(lx.tags()); root_ = prs.generate(); } void open(const std::string& filename) { file_.open(filename.c_str(), std::ios_base::binary); } void close() { file_.close(); } friend std::ostream& operator << (std::ostream&, document&); ~document() { delete root_; } node::element_selecter operator [](const std::string& index) { auto iter_pair = root_->find(index); if(iter_pair.first == iter_pair.second) { std::stringstream ss; ss << "error: \"tag <" << index << "> not found\""; throw std::exception(ss.str().c_str()); } return node::element_selecter(iter_pair.first, iter_pair.second); } private: std::fstream file_; node* root_; }; std::ostream& operator << (std::ostream& os, document& doc) { std::string line; while(std::getline(doc.file_, line)) { os << line << '\n'; } if(os.fail() || os.eof()) os.clear(); return os; } } int main() { try { xml::document doc("data.xml"); std::string name; std::cout << "Bitte geben Sie den Namen des Kindes ein: "; std::cin >> name; std::cout << name << " ist " << doc["kinder"][0][name][0]["alter"][0].data() << "."; } catch(std::ios_base::failure& e) { std::cerr << e.what() << '\n'; } catch(xml::lexer::format_error& e) { std::cerr << "format error: \"" << e.what() << "\" in line " << e.line() << " and column " << e.column() << '\n'; } catch(xml::parser::parse_error& e) { std::cerr << "parse error: \"" << e.what() << "\"\n"; } catch(std::exception& e) { std::cerr << e.what() << '\n'; } std::cin.sync(); std::cin.get(); }
data.xml:
<familie> <eltern> <vater>Peter</vater> <mutter>Hanna</mutter> </eltern> <kinder> <Paul> <alter>23</alter> <geschlecht>maennlich</geschlecht> </Paul> <Lisa> <alter>14</alter> <geschlecht>weiblich</geschlecht> </Lisa> <Raoule> <alter>18</alter> </Raoule> </kinder> </familie>
Vielen Dank!