RESTinio
utf8_checker.hpp
Go to the documentation of this file.
1/*
2 * RESTinio
3 */
4
12#pragma once
13
15
16#include <cstdint>
17
18namespace restinio
19{
20
21namespace utils
22{
23
24//
25// utf8_checker_t
26//
27
35{
36public:
37 utf8_checker_t() = default;
38
40 bool
41 process_byte( std::uint8_t byte ) noexcept
42 {
43 check_overlong( byte );
44
46 {
47 // check byte is 10xxxxxx.
48 if( (byte & 0xC0) == 0x80 )
49 {
50 m_current_symbol <<= 6;
51 byte &= 0x3F;
52
54
56 {
58 }
59 }
60 else
61 {
63 }
64 }
65 else
66 {
68
69 if( (byte & 0x80) == 0x00)
70 {
71 // mask 0xxxxxxx
73 }
74 else if( (byte & 0xE0) == 0xC0)
75 {
76 // mask 110xxxxx
78 byte &= 0x1F;
79 }
80 else if( (byte & 0xF0) == 0xE0)
81 {
82 // mask 1110xxxx
84 byte &= 0xF;
85 }
86 else if( (byte & 0xF8) == 0xF0)
87 {
88 // mask 11110xxx
90 byte &= 0x7;
91 }
92 else if( (byte & 0xFC) == 0xF8)
93 {
94 // mask 111110xx
96 byte &= 0x3;
97 }
98 else if( (byte & 0xFE) == 0xFC)
99 {
100 // mask 1111110x
102 byte &= 0x1;
103 }
104 else
105 {
107 }
108
110 }
111
113 }
114
119 bool
120 finalized() const noexcept
121 {
122 return m_current_symbol_rest_bytes == 0;
123 }
124
125 void
126 reset() noexcept
127 {
130 }
131
133 std::uint32_t
134 current_symbol() const noexcept { return m_current_symbol; }
135
136private:
137
138 void
140 {
141 if( (m_current_symbol >= 0xD800 && m_current_symbol <= 0xDFFF) ||
142 (m_current_symbol >= 0x110000) )
143 {
145 }
146 }
147
148 void
149 check_overlong( std::uint8_t byte ) noexcept
150 {
153 {
155 (byte & 0xE0) == 0x80 )
157 else if( m_current_symbol_rest_bytes == 3 &&
158 (byte & 0xF0) == 0x80 )
160 else if( m_current_symbol_rest_bytes == 4 &&
161 (byte & 0xF8) == 0x80 )
163 else if( m_current_symbol_rest_bytes == 5 &&
164 (byte & 0xFC) == 0x80 )
166 else
168 }
169 else
170 {
171 if( byte == 0xC0 || byte == 0xC1 )
172 {
174 }
175 else if( byte == 0xE0 )
176 {
178 }
179 else if( byte == 0xF0 )
180 {
182 }
183 if( byte == 0xF8 )
184 {
186 }
187 if( byte == 0xFC )
188 {
190 }
191 }
192 }
193
194 std::uint32_t m_current_symbol = 0u;
195
197
198 enum class state_t
199 {
200 valid,
201 invalid,
202 may_be_overlong,
203 overlong
204 };
205
207};
208
209} /* namespace utils */
210
211} /* namespace restinio */
212
Helper class for checking UTF-8 byte sequence during parsing URI or incoming byte stream.
void validate_current_symbol() noexcept
RESTINIO_NODISCARD bool finalized() const noexcept
RESTINIO_NODISCARD bool process_byte(std::uint8_t byte) noexcept
void check_overlong(std::uint8_t byte) noexcept
RESTINIO_NODISCARD std::uint32_t current_symbol() const noexcept
Detection of compiler version and absence of various features.
#define RESTINIO_NODISCARD
unsigned int byte(digest_t::value_type v)
Definition: sha1.hpp:365
#define const
Definition: zconf.h:230