Orcus
 All Classes Functions Variables Enumerations Enumerator Pages
sax_parser_base.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
9 #define INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
10 
11 #include "env.hpp"
12 #include "pstring.hpp"
13 #include "cell_buffer.hpp"
14 #include "parser_global.hpp"
15 #include "parser_base.hpp"
16 
17 #include <cassert>
18 #include <cstdlib>
19 #include <exception>
20 #include <sstream>
21 #include <memory>
22 
23 #define ORCUS_DEBUG_SAX_PARSER 0
24 
25 #if ORCUS_DEBUG_SAX_PARSER
26 #include <iostream>
27 using std::cout;
28 using std::endl;
29 #endif
30 
31 namespace orcus { namespace sax {
32 
33 class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error
34 {
35 public:
36  malformed_xml_error() = delete;
37  malformed_xml_error(const std::string& msg, std::ptrdiff_t offset);
38  virtual ~malformed_xml_error() throw();
39 };
40 
46 {
47  enum class keyword_type { dtd_public, dtd_private };
48 
49  keyword_type keyword;
50  pstring root_element;
51  pstring fpi;
52  pstring uri;
53 };
54 
66 ORCUS_PSR_DLLPUBLIC char decode_xml_encoded_char(const char* p, size_t n);
67 
79 ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n);
80 
86 {
87  pstring ns; // element namespace (optional)
88  pstring name; // element name
89  const char* begin_pos; // position of the opening brace '<'.
90  const char* end_pos; // position of the char after the closing brace '>'.
91 };
92 
101 {
102  pstring ns; // attribute namespace (optional)
103  pstring name; // attribute name
104  pstring value; // attribute value
105  bool transient; // whether or not the attribute value is on a temporary buffer.
106 };
107 
108 class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
109 {
110  struct impl;
111  std::unique_ptr<impl> mp_impl;
112 
113  parser_base() = delete;
114  parser_base(const parser_base&) = delete;
115  parser_base& operator=(const parser_base&) = delete;
116 protected:
117  size_t m_nest_level;
118  size_t m_buffer_pos;
119  bool m_root_elem_open:1;
120 
121 protected:
122  parser_base(const char* content, size_t size);
123  ~parser_base();
124 
125  void next_check()
126  {
127  next();
128  if (!has_char())
129  throw malformed_xml_error("xml stream ended prematurely.", offset());
130  }
131 
132  void nest_up() { ++m_nest_level; }
133  void nest_down()
134  {
135  assert(m_nest_level > 0);
136  --m_nest_level;
137  }
138 
139  void inc_buffer_pos();
140  void reset_buffer_pos() { m_buffer_pos = 0; }
141 
142  void has_char_throw(const char* msg) const
143  {
144  if (!has_char())
145  throw malformed_xml_error(msg, offset());
146  }
147 
155  inline size_t remains() const
156  {
157 #if ORCUS_DEBUG_SAX_PARSER
158  if (mp_char >= mp_end)
159  throw malformed_xml_error("xml stream ended prematurely.", offset());
160 #endif
161  return mp_end - mp_char;
162  }
163 
164  char cur_char_checked() const
165  {
166  if (!has_char())
167  throw malformed_xml_error("xml stream ended prematurely.", offset());
168 
169  return *mp_char;
170  }
171 
172  char next_and_char()
173  {
174  next();
175 #if ORCUS_DEBUG_SAX_PARSER
176  if (mp_char >= mp_end)
177  throw malformed_xml_error("xml stream ended prematurely.", offset());
178 #endif
179  return *mp_char;
180  }
181 
182  char next_char_checked()
183  {
184  next();
185  if (!has_char())
186  throw malformed_xml_error("xml stream ended prematurely.", offset());
187 
188  return *mp_char;
189  }
190 
191  cell_buffer& get_cell_buffer();
192 
193  void blank();
194  void comment();
195 
199  void skip_bom();
200 
201  void expects_next(const char* p, size_t n);
202 
203  void parse_encoded_char(cell_buffer& buf);
204  void value_with_encoded_char(cell_buffer& buf, pstring& str);
205 
214  bool value(pstring& str, bool decode);
215 
216  void name(pstring& str);
217  void element_name(parser_element& elem, const char* begin_pos);
218  void attribute_name(pstring& attr_ns, pstring& attr_name);
219  void characters_with_encoded_char(cell_buffer& buf);
220 };
221 
222 }}
223 
224 #endif
225 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: pstring.hpp:24
Definition: sax_parser_base.hpp:33
Definition: sax_parser_base.hpp:100
Definition: parser_base.hpp:34
Definition: sax_parser_base.hpp:85
Definition: sax_parser_base.hpp:45
size_t remains() const
Definition: sax_parser_base.hpp:155
Definition: parser_base.hpp:20
Definition: sax_parser_base.hpp:108