Suche Tester



  • Hallo!
    Ich Zuge der Ferien baue ich gerade einen rudimentären XML-Interpreter. Nun habe ich einen kleinen Parser, Lexer und eine Vorform eines DOM-Baumes fertig gestellt, der XML-Syntax der Form

    <root>
       <node>data</node>
       <node>
          <more>data</more>
       </node>
    </root>
    

    interpretieren kann (also erst mal ohne Attribute). Ich würde mich über Kritik oder Bugs freuen, damit ich das Programm robust machen kann.

    Ein Beispielprogramm:

    #include <fstream>
    #include <vector>
    #include <stdexcept>
    #include <string>
    #include <stack>
    #include <vector>
    #include <map>
    #include <sstream>
    #include <cctype>
    #include <iostream>
    
    namespace xml {
    
    	class tag {
    	public:
    		tag(const std::string& str, bool open)
    			: tagname_(str), value_(), open_(open) { }
    
    		tag(const std::string& info)
    			: tagname_(), value_(info), open_(false) 
    		{
    		}
    
    		tag(const tag& t) {
    			if(t.is_tag()) {
    				tagname_ = t.tagname_;
    				open_ = t.open_;
    			}
    			else {
    				value_ = t.value_;
    			}
    		}
    
    		bool is_tag() const { return !tagname_.empty(); }
    		const std::string& name() const { return tagname_; }
    		const std::string& value() const { return value_; }
    		bool open() const { return open_; }
    	private:
    		std::string tagname_, value_;
    		bool open_;
    	};
    
    	bool operator <(const tag& t1, const tag& t2) {
    		return t1.name() < t2.name();
    	}
    
    	class lexer {
    	public:
    		lexer(std::fstream& fs) {
    			char c = 0;
    			bool in_tag = false;
    			unsigned line_counter = 1, column_counter = 0;
    			bool open = false;
    			std::string tagname, content;
    			bool single_space = true;
    			while(!fs.eof()) {
    				c = fs.get();
    				column_counter++;
    				switch(c) {
    				case ' ':
    				case '\n':
    				case '\t':
    					if(in_tag)
    						throw format_error("unexpected whitespace in tag name found", line_counter, column_counter);
    					if(c == '\n') {
    						line_counter++;
    						column_counter = 0;
    					}
    					if(c == ' ') {
    						if(single_space) {
    							content.push_back(c);
    							single_space = false;
    						}
    					}
    					continue;
    				case '<':
    					in_tag = true; // Reading Tag-Name begins
    					open = true;
    					if(!content.empty()) {
    						tags_.push_back(tag(content));
    						content.clear();
    					}
    					if(!std::isalpha((c = fs.get())) && c != '/') {
    						column_counter++;
    						throw format_error("invalid tag name beginning", line_counter, column_counter);
    					}
    					else
    						fs.putback(c);
    					continue;
    				case '>':
    					in_tag = false; // Reading Tag-Name ends
    					tags_.push_back(tag(tagname, open));
    					tagname.clear();
    					continue;
    				case '/':
    					if(in_tag)
    						open = false; // Close-Tag
    					continue; // next char
    				default:
    					single_space = true;
    				}
    
    				if(std::isalnum(c)) {
    					if(in_tag) {
    						tagname.push_back(c);
    					}
    					else {
    						content.push_back(c);
    					}
    				}
    				else {
    					if(in_tag)
    						throw format_error("invalid tag name", line_counter, column_counter);
    				}
    			}
    		}
    
    		class format_error : public std::exception {
    		public:
    			format_error(const char* message, unsigned line, unsigned column)
    				: std::exception(message), line_(line), column_(column)
    			{ }
    
    			unsigned line() const { return line_; }
    			unsigned column() const { return column_; }
    		private:
    			unsigned line_, column_;
    		};
    
    		const std::vector<tag>& tags() const { return tags_; }
    
    	private:
    		char next(std::fstream& fs) {
    			char c = ' ';
    			while(c == ' ')
    				c = fs.get();
    			return c;
    		}
    
    		std::vector<tag> tags_;
    	};
    
    	class node {
    	public:
    		class element_selecter {
    		public:
    		template <typename In>
    		element_selecter(In begin, In end)
    			: elements_(std::distance(begin, end))
    		{
    			for(unsigned n = 0; n < elements_.size(); ++n) {
    				elements_[n] = begin->second;
    				++begin;
    			}
    		}
    
    		node& operator [](unsigned index) const {
    			return *elements_.at(index);
    		}
    
    	private:
    		std::vector<node*> elements_;
    	};
    
    		node(const tag& tag, node* parent)
    			: name_(tag), data_(), children_(), parent_(parent) 
    		{ }
    
    		~node() {
    			std::multimap<tag, node*>::iterator end = children_.end();
    			for(auto iter = children_.begin(); iter != end; ++iter) {
    				delete iter->second;
    			}
    		}
    
    		void add_data(const tag& t) {
    			data_.push_back(t);
    		}
    
    		void add_sibling(node* n) {
    			children_.insert(std::make_pair(n->get_tag(), n));
    		}
    
    		const tag& get_tag() const { return name_; }
    		node* get_parent() { return parent_; }
    
    		std::pair<std::multimap<tag, node*>::iterator, std::multimap<tag, node*>::iterator> find(const std::string& tagname) {
    			return children_.equal_range(tag(tagname, true));
    		}
    
    		const std::string& data() {
    			if(!children_.empty() || data_.empty()) {
    				std::stringstream ss;
    				ss << "DOM error: no text value for <" << name_.name() << "> available";
    				throw std::exception(ss.str().c_str());
    			}
    			return data_[0].value();
    		}
    
    		element_selecter operator [](const std::string& tagname) {
    			auto iter_pair = find(tagname);
    			if(iter_pair.first == iter_pair.second || iter_pair.first == children_.end()) {
    				std::stringstream ss;
    				ss << "error: \"tag <" << tagname << "> not found\"";
    				throw std::exception(ss.str().c_str());
    			}
    			return element_selecter(iter_pair.first, iter_pair.second);
    		}
    
    		const tag& operator ->() {
    			return name_;
    		}
    
    	private:
    		tag name_;
    		std::vector<tag> data_;
    		std::multimap<tag, node*> children_;
    		node* parent_;
    	};
    
    	class parser {
    	public:
    		parser(const std::vector<tag>& tags)
    			: tags_(tags)
    		{
    			// Syntax-Check
    			std::stack<const tag*> tag_stack;
    			for(std::size_t n = 0; n < tags.size(); ++n) {
    				if(tags[n].is_tag()) {
    					if(tags[n].open()) {
    						tag_stack.push(&tags[n]);
    					}
    					else {
    						if(tag_stack.empty()) {
    							std::stringstream ss;
    							ss << "tag mismatch: unexpected closing tag </" << tags[n].name() << "> found";
    							throw parse_error(ss.str().c_str());
    						}
    						if(tag_stack.top()->name() != tags[n].name()) {
    							std::stringstream ss;
    							ss << "tag mismatch: </" << tag_stack.top()->name() << "> expected, but </" << tags[n].name() << "> found";
    							throw parse_error(ss.str().c_str());
    						}
    						tag_stack.pop();
    					}
    				}
    			}
    			if(!tag_stack.empty()) {
    				std::stringstream ss;
    				ss << "closing tag </" << tag_stack.top()->name() << "> expected, but not found";
    				throw parse_error(ss.str().c_str());
    			}
    		}
    
    		node* generate() {
    			node* root = new node(tags_[0], 0);
    			node* current = root;
    			for(unsigned n = 1; n < tags_.size(); ++n) {
    				if(tags_[n].is_tag()) { // nested tag found
    					if(tags_[n].open()) { // if this tag is open, it's a new one
    						node* next = new node(tags_[n], current);
    						current->add_sibling(next);
    						current = next;
    					}
    					else { // current tag must have been closed (parser checks this in constructor)
    						current = current->get_parent(); // back to parent tag
    					}
    				}
    				else { // it's data
    					current->add_data(tags_[n]);
    				}
    			}
    			return root;
    		}
    
    		class parse_error : public std::exception {
    		public:
    			parse_error(const char* message)
    				: std::exception(message)
    			{ }
    		};
    
    	private:
    		const std::vector<tag>& tags_;
    	};
    
    	class document {
    	public:
    		document() { }
    		document(const std::string& filename)
    			: file_(filename.c_str(), std::ios_base::binary | std::ios_base::in | std::ios_base::out), root_(0)
    		{
    			if(!file_)
    				throw std::ios_base::failure("can't open XML file");
    			lexer lx(file_);
    			parser prs(lx.tags());
    			root_ = prs.generate();
    		}
    
    		void open(const std::string& filename) {
    			file_.open(filename.c_str(), std::ios_base::binary);
    		}
    
    		void close() {
    			file_.close();
    		}
    
    		friend std::ostream& operator << (std::ostream&, document&);
    
    		~document() {
    			delete root_;
    		}
    
    		node::element_selecter operator [](const std::string& index) {
    			auto iter_pair = root_->find(index);
    			if(iter_pair.first == iter_pair.second) {
    				std::stringstream ss;
    				ss << "error: \"tag <" << index << "> not found\"";
    				throw std::exception(ss.str().c_str());
    			}
    			return node::element_selecter(iter_pair.first, iter_pair.second);
    		}
    
    	private:
    		std::fstream file_;
    		node* root_;
    	};
    
    	std::ostream& operator << (std::ostream& os, document& doc) {
    		std::string line;
    		while(std::getline(doc.file_, line)) {
    			os << line << '\n';
    		}
    		if(os.fail() || os.eof())
    			os.clear();
    		return os;
    	}
    
    }
    
    int main() {
    	try {
    		xml::document doc("data.xml");
    		std::string name;
    		std::cout << "Bitte geben Sie den Namen des Kindes ein: ";
    		std::cin >> name;
    		std::cout << name << " ist " << doc["kinder"][0][name][0]["alter"][0].data() << ".";
    	}
    	catch(std::ios_base::failure& e) {
    		std::cerr << e.what() << '\n';
    	}
    	catch(xml::lexer::format_error& e) {
    		std::cerr << "format error: \"" << e.what() << "\" in line " << e.line() << " and column " << e.column() << '\n';
    	}
    	catch(xml::parser::parse_error& e) {
    		std::cerr << "parse error: \"" << e.what() << "\"\n";
    	}
    	catch(std::exception& e) {
    		std::cerr << e.what() << '\n';
    	}
    	std::cin.sync();
    	std::cin.get();
    }
    

    data.xml:

    <familie>
      <eltern>
        <vater>Peter</vater>
        <mutter>Hanna</mutter>
      </eltern>
      <kinder>
        <Paul>
          <alter>23</alter>
          <geschlecht>maennlich</geschlecht>
        </Paul>
        <Lisa>
          <alter>14</alter>
          <geschlecht>weiblich</geschlecht>
        </Lisa>
        <Raoule>
          <alter>18</alter>
        </Raoule>
      </kinder>
    </familie>
    

    Vielen Dank! 🙂


Anmelden zum Antworten