GCC Code Coverage Report


Directory: libs/url/
File: boost/url/grammar/lut_chars.hpp
Date: 2024-04-08 19:38:36
Exec Total Coverage
Lines: 47 47 100.0%
Functions: 16 16 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 //
2 // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/url
8 //
9
10 #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
11 #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
12
13 #include <boost/url/detail/config.hpp>
14 #include <boost/url/grammar/detail/charset.hpp>
15 #include <cstdint>
16 #include <type_traits>
17
18 // Credit to Peter Dimov for ideas regarding
19 // SIMD constexpr, and character set masks.
20
21 namespace boost {
22 namespace urls {
23 namespace grammar {
24
25 #ifndef BOOST_URL_DOCS
26 namespace detail {
27 template<class T, class = void>
28 struct is_pred : std::false_type {};
29
30 template<class T>
31 struct is_pred<T, void_t<
32 decltype(
33 std::declval<bool&>() =
34 std::declval<T const&>().operator()(
35 std::declval<char>())
36 ) > > : std::true_type
37 {
38 };
39 } // detail
40 #endif
41
42 /** A set of characters
43
44 The characters defined by instances of
45 this set are provided upon construction.
46 The `constexpr` implementation allows
47 these to become compile-time constants.
48
49 @par Example
50 Character sets are used with rules and the
51 functions @ref find_if and @ref find_if_not.
52 @code
53 constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
54
55 system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
56 @endcode
57
58 @see
59 @ref find_if,
60 @ref find_if_not,
61 @ref parse,
62 @ref token_rule.
63 */
64 class lut_chars
65 {
66 std::uint64_t mask_[4] = {};
67
68 constexpr
69 static
70 std::uint64_t
71 155693 lo(char c) noexcept
72 {
73 155693 return static_cast<
74 155693 unsigned char>(c) & 3;
75 }
76
77 constexpr
78 static
79 std::uint64_t
80 136682 hi(char c) noexcept
81 {
82 136682 return 1ULL << (static_cast<
83 136682 unsigned char>(c) >> 2);
84 }
85
86 constexpr
87 static
88 lut_chars
89 construct(
90 char const* s) noexcept
91 {
92 return *s
93 ? lut_chars(*s) +
94 construct(s+1)
95 : lut_chars();
96 }
97
98 constexpr
99 static
100 lut_chars
101 34048 construct(
102 unsigned char ch,
103 bool b) noexcept
104 {
105 return b
106 5824 ? lut_chars(ch)
107
4/4
✓ Branch 0 taken 5824 times.
✓ Branch 1 taken 28224 times.
✓ Branch 3 taken 84672 times.
✓ Branch 4 taken 28224 times.
118720 : lut_chars();
108 }
109
110 template<class Pred>
111 constexpr
112 static
113 lut_chars
114 68096 construct(
115 Pred pred,
116 unsigned char ch) noexcept
117 {
118 return ch == 255
119 266 ? construct(ch, pred(static_cast<char>(ch)))
120 67830 : construct(ch, pred(static_cast<char>(ch))) +
121
2/2
✓ Branch 0 taken 133 times.
✓ Branch 1 taken 33915 times.
136192 construct(pred, ch + 1);
122 }
123
124 constexpr
125 28224 lut_chars() = default;
126
127 constexpr
128 34105 lut_chars(
129 std::uint64_t m0,
130 std::uint64_t m1,
131 std::uint64_t m2,
132 std::uint64_t m3) noexcept
133 34105 : mask_{ m0, m1, m2, m3 }
134 {
135 34105 }
136
137 public:
138 /** Constructor
139
140 This function constructs a character
141 set which has as a single member,
142 the character `ch`.
143
144 @par Example
145 @code
146 constexpr lut_chars asterisk( '*' );
147 @endcode
148
149 @par Complexity
150 Constant.
151
152 @par Exception Safety
153 Throws nothing.
154
155 @param ch A character.
156 */
157 constexpr
158 6337 lut_chars(char ch) noexcept
159 6337 : mask_ {
160
2/2
✓ Branch 1 taken 1362 times.
✓ Branch 2 taken 4975 times.
6337 lo(ch) == 0 ? hi(ch) : 0,
161
2/2
✓ Branch 0 taken 1901 times.
✓ Branch 1 taken 4436 times.
6337 lo(ch) == 1 ? hi(ch) : 0,
162
2/2
✓ Branch 0 taken 1738 times.
✓ Branch 1 taken 4599 times.
6337 lo(ch) == 2 ? hi(ch) : 0,
163
2/2
✓ Branch 3 taken 1336 times.
✓ Branch 4 taken 5001 times.
19011 lo(ch) == 3 ? hi(ch) : 0 }
164 {
165 6337 }
166
167 /** Constructor
168
169 This function constructs a character
170 set which has as members, all of the
171 characters present in the null-terminated
172 string `s`.
173
174 @par Example
175 @code
176 constexpr lut_chars digits = "0123456789";
177 @endcode
178
179 @par Complexity
180 Linear in `::strlen(s)`, or constant
181 if `s` is a constant expression.
182
183 @par Exception Safety
184 Throws nothing.
185
186 @param s A null-terminated string.
187 */
188 constexpr
189 lut_chars(
190 char const* s) noexcept
191 : lut_chars(construct(s))
192 {
193 }
194
195 /** Constructor.
196
197 This function constructs a character
198 set which has as members, every value
199 of `char ch` for which the expression
200 `pred(ch)` returns `true`.
201
202 @par Example
203 @code
204 struct is_digit
205 {
206 constexpr bool
207 operator()(char c ) const noexcept
208 {
209 return c >= '0' && c <= '9';
210 }
211 };
212
213 constexpr lut_chars digits( is_digit{} );
214 @endcode
215
216 @par Complexity
217 Linear in `pred`, or constant if
218 `pred(ch)` is a constant expression.
219
220 @par Exception Safety
221 Throws nothing.
222
223 @param pred The function object to
224 use for determining membership in
225 the character set.
226 */
227 template<class Pred
228 #ifndef BOOST_URL_DOCS
229 ,class = typename std::enable_if<
230 detail::is_pred<Pred>::value &&
231 ! std::is_base_of<
232 lut_chars, Pred>::value>::type
233 #endif
234 >
235 constexpr
236 266 lut_chars(Pred const& pred) noexcept
237 : lut_chars(
238 266 construct(pred, 0))
239 {
240 266 }
241
242 /** Return true if ch is in the character set.
243
244 This function returns true if the
245 character `ch` is in the set, otherwise
246 it returns false.
247
248 @par Complexity
249 Constant.
250
251 @par Exception Safety
252 Throws nothing.
253
254 @param ch The character to test.
255 */
256 constexpr
257 bool
258 1280 operator()(
259 unsigned char ch) const noexcept
260 {
261 1280 return operator()(static_cast<char>(ch));
262 }
263
264 /// @copydoc operator()(unsigned char) const
265 constexpr
266 bool
267 130345 operator()(char ch) const noexcept
268 {
269 130345 return mask_[lo(ch)] & hi(ch);
270 }
271
272 /** Return the union of two character sets.
273
274 This function returns a new character
275 set which contains all of the characters
276 in `cs0` as well as all of the characters
277 in `cs`.
278
279 @par Example
280 This creates a character set which
281 includes all letters and numbers
282 @code
283 constexpr lut_chars alpha_chars(
284 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
285 "abcdefghijklmnopqrstuvwxyz");
286
287 constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
288 @endcode
289
290 @par Complexity
291 Constant.
292
293 @return The new character set.
294
295 @param cs0 A character to join
296
297 @param cs1 A character to join
298 */
299 friend
300 constexpr
301 lut_chars
302 33919 operator+(
303 lut_chars const& cs0,
304 lut_chars const& cs1) noexcept
305 {
306 return lut_chars(
307 33919 cs0.mask_[0] | cs1.mask_[0],
308 33919 cs0.mask_[1] | cs1.mask_[1],
309 33919 cs0.mask_[2] | cs1.mask_[2],
310 33919 cs0.mask_[3] | cs1.mask_[3]);
311 }
312
313 /** Return a new character set by subtracting
314
315 This function returns a new character
316 set which is formed from all of the
317 characters in `cs0` which are not in `cs`.
318
319 @par Example
320 This statement declares a character set
321 containing all the lowercase letters
322 which are not vowels:
323 @code
324 constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
325 @endcode
326
327 @par Complexity
328 Constant.
329
330 @return The new character set.
331
332 @param cs0 A character set to join.
333
334 @param cs1 A character set to join.
335 */
336 friend
337 constexpr
338 lut_chars
339 186 operator-(
340 lut_chars const& cs0,
341 lut_chars const& cs1) noexcept
342 {
343 return lut_chars(
344 186 cs0.mask_[0] & ~cs1.mask_[0],
345 186 cs0.mask_[1] & ~cs1.mask_[1],
346 186 cs0.mask_[2] & ~cs1.mask_[2],
347 186 cs0.mask_[3] & ~cs1.mask_[3]);
348 }
349
350 /** Return a new character set which is the complement of another character set.
351
352 This function returns a new character
353 set which contains all of the characters
354 that are not in `*this`.
355
356 @par Example
357 This statement declares a character set
358 containing everything but vowels:
359 @code
360 constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
361 @endcode
362
363 @par Complexity
364 Constant.
365
366 @par Exception Safety
367 Throws nothing.
368
369 @return The new character set.
370 */
371 constexpr
372 lut_chars
373 operator~() const noexcept
374 {
375 return lut_chars(
376 ~mask_[0],
377 ~mask_[1],
378 ~mask_[2],
379 ~mask_[3]
380 );
381 }
382
383 #ifndef BOOST_URL_DOCS
384 #ifdef BOOST_URL_USE_SSE2
385 char const*
386 1603 find_if(
387 char const* first,
388 char const* last) const noexcept
389 {
390 1603 return detail::find_if_pred(
391 1603 *this, first, last);
392 }
393
394 char const*
395 13909 find_if_not(
396 char const* first,
397 char const* last) const noexcept
398 {
399 13909 return detail::find_if_not_pred(
400 13909 *this, first, last);
401 }
402 #endif
403 #endif
404 };
405
406 } // grammar
407 } // urls
408 } // boost
409
410 #endif
411