mimic++ v9.2.1
Loading...
Searching...
No Matches
NameParser.hpp
Go to the documentation of this file.
1// Copyright Dominic (DNKpp) Koepke 2024 - 2025.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at
4// https://www.boost.org/LICENSE_1_0.txt)
5
6#ifndef MIMICPP_PRINTING_TYPE_NAME_PARSER_HPP
7#define MIMICPP_PRINTING_TYPE_NAME_PARSER_HPP
8
9#pragma once
10
11#include "mimic++/Fwd.hpp"
17
18#ifndef MIMICPP_DETAIL_IS_MODULE
19 #include <array>
20 #include <functional>
21 #include <iterator>
22 #include <type_traits>
23 #include <variant>
24#endif
25
27{
28 template <parser_visitor Visitor>
30 {
31 public:
32 [[nodiscard]]
33 explicit constexpr NameParser(Visitor visitor, StringViewT const& content) noexcept(std::is_nothrow_move_constructible_v<Visitor>)
34 : m_Visitor{std::move(visitor)},
35 m_Content{content},
36 m_Lexer{content}
37 {
38 }
39
40 constexpr void parse_type()
41 {
42 parse();
43 token::try_reduce_as_type(m_TokenStack);
44 if (!finalize<token::Type>())
45 {
46 emit_unrecognized();
47 }
48 }
49
50 constexpr void parse_function()
51 {
52 parse();
53
54 if (m_HasConversionOperator)
55 {
57 }
58 else
59 {
62 }
63
65 if (!finalize<token::Function>())
66 {
67 // Well, this is a workaround to circumvent issues with lambdas on some environments.
68 // gcc produces lambdas in form `<lambda()>` which are not recognized as actual functions.
69 token::try_reduce_as_type(m_TokenStack);
70 if (!finalize<token::Type>())
71 {
72 emit_unrecognized();
73 }
74 }
75 }
76
77 private:
78 static constexpr lexing::operator_or_punctuator openingParens{"("};
79 static constexpr lexing::operator_or_punctuator closingParens{")"};
80 static constexpr lexing::operator_or_punctuator openingAngle{"<"};
81 static constexpr lexing::operator_or_punctuator closingAngle{">"};
82 static constexpr lexing::operator_or_punctuator openingCurly{"{"};
83 static constexpr lexing::operator_or_punctuator closingCurly{"}"};
84 static constexpr lexing::operator_or_punctuator openingSquare{"["};
85 static constexpr lexing::operator_or_punctuator closingSquare{"]"};
86 static constexpr lexing::operator_or_punctuator backtick{"`"};
87 static constexpr lexing::operator_or_punctuator singleQuote{"'"};
88 static constexpr lexing::operator_or_punctuator scopeResolution{"::"};
89 static constexpr lexing::operator_or_punctuator commaSeparator{","};
90 static constexpr lexing::operator_or_punctuator pointer{"*"};
91 static constexpr lexing::operator_or_punctuator lvalueRef{"&"};
92 static constexpr lexing::operator_or_punctuator rvalueRef{"&&"};
93 static constexpr lexing::operator_or_punctuator colon{":"};
94 static constexpr lexing::operator_or_punctuator leftShift{"<<"};
95 static constexpr lexing::operator_or_punctuator rightShift{">>"};
96 static constexpr lexing::operator_or_punctuator plus{"+"};
97 static constexpr lexing::operator_or_punctuator exclamationMark{"!"};
98 static constexpr lexing::operator_or_punctuator tilde{"~"};
99 static constexpr lexing::keyword operatorKeyword{"operator"};
100 static constexpr lexing::keyword constKeyword{"const"};
101 static constexpr lexing::keyword volatileKeyword{"volatile"};
102 static constexpr lexing::keyword noexceptKeyword{"noexcept"};
103 static constexpr lexing::keyword coAwaitKeyword{"co_await"};
104 static constexpr lexing::keyword newKeyword{"new"};
105 static constexpr lexing::keyword deleteKeyword{"delete"};
106 static constexpr lexing::keyword classKeyword{"class"};
107 static constexpr lexing::keyword structKeyword{"struct"};
108 static constexpr lexing::keyword enumKeyword{"enum"};
109
110 static constexpr std::array typeKeywordCollection = {
111 lexing::keyword{"auto"},
112 lexing::keyword{"void"},
113 lexing::keyword{"bool"},
114 lexing::keyword{"char"},
115 lexing::keyword{"char8_t"},
116 lexing::keyword{"char16_t"},
117 lexing::keyword{"char32_t"},
118 lexing::keyword{"wchar_t"},
119 lexing::keyword{"double"},
120 lexing::keyword{"float"},
121 lexing::keyword{"int"},
122 lexing::keyword{"__int64"},
123 lexing::keyword{"long"},
124 lexing::keyword{"short"},
125 lexing::keyword{"signed"},
126 lexing::keyword{"unsigned"}};
127
128 Visitor m_Visitor;
129 StringViewT m_Content;
130 lexing::NameLexer m_Lexer;
131 bool m_HasConversionOperator{false};
132
133 std::vector<Token> m_TokenStack{};
134
135 template <typename LexerTokenClass>
136 constexpr LexerTokenClass const* peek_if() const noexcept
137 {
138 return std::get_if<LexerTokenClass>(&m_Lexer.peek().classification);
139 }
140
141 constexpr void parse()
142 {
143 for (lexing::token next = m_Lexer.next();
144 !std::holds_alternative<lexing::end>(next.classification);
145 next = m_Lexer.next())
146 {
147 std::visit(
148 [&](auto const& tokenClass) { handle_lexer_token(next.content, tokenClass); },
149 next.classification);
150 }
151 }
152
153 template <token_type EndToken>
154 constexpr bool finalize()
155 {
156 if (1u == m_TokenStack.size())
157 {
158 if (auto const* const end = std::get_if<EndToken>(&m_TokenStack.back()))
159 {
160 auto& unwrapped = unwrap_visitor(m_Visitor);
161
162 unwrapped.begin();
163 std::invoke(*end, m_Visitor);
164 unwrapped.end();
165
166 return true;
167 }
168 }
169
170 return false;
171 }
172
173 constexpr void emit_unrecognized()
174 {
175 auto& unwrapped = unwrap_visitor(m_Visitor);
176 unwrapped.unrecognized(m_Content);
177 }
178
179 static constexpr void handle_lexer_token([[maybe_unused]] StringViewT const content, [[maybe_unused]] lexing::end const& end)
180 {
182 }
183
184 [[nodiscard]]
185 constexpr bool merge_with_next_token() const noexcept
186 {
187 auto const* const keyword = peek_if<lexing::keyword>();
188
189 return keyword
190 && util::contains(typeKeywordCollection, *keyword);
191 }
192
193 constexpr void handle_lexer_token([[maybe_unused]] StringViewT const content, [[maybe_unused]] lexing::space const& space)
194 {
195 if (auto* const id = match_suffix<token::Identifier>(m_TokenStack))
196 {
197 // See, whether we need to merge the current builtin identifier with another one.
198 // E.g. `long long` or `unsigned int`.
199 if (id->is_builtin()
200 && merge_with_next_token())
201 {
202 auto& curContent = std::get<StringViewT>(id->content);
203 auto const [nextContent, _] = m_Lexer.next();
204 // Merge both keywords by simply treating them as contiguous content.
205 MIMICPP_ASSERT(curContent.data() + curContent.size() == content.data(), "Violated expectation.");
206 MIMICPP_ASSERT(content.data() + content.size() = nextContent.data(), "Violated expectation.");
207 curContent = StringViewT{
208 curContent.data(),
209 nextContent.data() + nextContent.size()};
210
211 return;
212 }
213
214 token::try_reduce_as_type(m_TokenStack);
215 }
216
217 // In certain cases, a space after an identifier has semantic significance.
218 // For example, consider the type names `void ()` and `foo()`:
219 // - `void ()` represents a function type returning `void`.
220 // - `foo()` represents a function named `foo`.
221 if (auto const* const nextOp = peek_if<lexing::operator_or_punctuator>();
222 nextOp
223 && util::contains(std::array{openingAngle, openingParens, openingCurly, singleQuote, backtick}, *nextOp))
224 {
225 m_TokenStack.emplace_back(token::Space{});
226 }
227 }
228
229 constexpr void handle_lexer_token([[maybe_unused]] StringViewT const content, lexing::identifier const& identifier)
230 {
231 m_TokenStack.emplace_back(
232 token::Identifier{.content = identifier.content});
233 }
234
235 constexpr void handle_lexer_token([[maybe_unused]] StringViewT const content, lexing::keyword const& keyword)
236 {
237 if (constKeyword == keyword)
238 {
239 auto& specs = token::get_or_emplace_specs(m_TokenStack);
240 MIMICPP_ASSERT(!specs.layers.empty(), "Zero spec layers detected.");
241 auto& top = specs.layers.back();
242 MIMICPP_ASSERT(!top.isConst, "Specs is already const.");
243 top.isConst = true;
244 }
245 else if (volatileKeyword == keyword)
246 {
247 auto& specs = token::get_or_emplace_specs(m_TokenStack);
248 MIMICPP_ASSERT(!specs.layers.empty(), "Zero spec layers detected.");
249 auto& top = specs.layers.back();
250 MIMICPP_ASSERT(!top.isVolatile, "Specs is already volatile.");
251 top.isVolatile = true;
252 }
253 else if (noexceptKeyword == keyword)
254 {
255 auto& specs = token::get_or_emplace_specs(m_TokenStack);
256 MIMICPP_ASSERT(!specs.isNoexcept, "Specs already is a noexcept.");
257 specs.isNoexcept = true;
258 }
259 else if (operatorKeyword == keyword && !process_simple_operator())
260 {
261 // Conversion operators can not be part of a scope, thus they can not appear multiple times in a single type-name.
262 MIMICPP_ASSERT(!m_HasConversionOperator, "Multiple conversion operators detected.");
263
264 m_TokenStack.emplace_back(token::OperatorKeyword{});
265 m_HasConversionOperator = true;
266 }
267 else if (constexpr std::array collection{classKeyword, structKeyword, enumKeyword};
268 util::contains(collection, keyword))
269 {
270 // This token is needed, so we do not accidentally treat e.g. `(anonymous class)` as function args,
271 // because otherwise there would just be the `anonymous` identifier left.
272 m_TokenStack.emplace_back(token::TypeContext{.content = content});
273 }
274 else if (util::contains(typeKeywordCollection, keyword))
275 {
276 m_TokenStack.emplace_back(
277 token::Identifier{
278 .isBuiltinType = true,
279 .content = content});
280 }
281 }
282
283 constexpr bool process_simple_operator()
284 {
285 auto dropSpaceInput = [this] {
286 if (std::holds_alternative<lexing::space>(m_Lexer.peek().classification))
287 {
288 std::ignore = m_Lexer.next();
289 }
290 };
291
292 dropSpaceInput();
293
294 // As we assume valid input, we do not have to check for the actual symbol.
295 if (auto const next = m_Lexer.peek();
296 auto const* operatorToken = std::get_if<lexing::operator_or_punctuator>(&next.classification))
297 {
298 std::ignore = m_Lexer.next();
299
300 auto const finishMultiOpOperator = [&, this]([[maybe_unused]] lexing::operator_or_punctuator const& expectedClosingOp) {
301 auto const [closingContent, classification] = m_Lexer.next();
302 MIMICPP_ASSERT(lexing::token_class{expectedClosingOp} == classification, "Invalid input.");
303
304 StringViewT const content{
305 next.content.data(),
306 next.content.size() + closingContent.size()};
307 m_TokenStack.emplace_back(
308 token::Identifier{
309 .content = token::Identifier::OperatorInfo{.symbol = content}});
310 };
311
312 if (openingParens == *operatorToken)
313 {
314 finishMultiOpOperator(closingParens);
315 }
316 else if (openingSquare == *operatorToken)
317 {
318 finishMultiOpOperator(closingSquare);
319 }
320 // `operator <` and `operator <<` needs to be handled carefully, as it may come in as a template:
321 // `operator<<>` is actually `operator< <>`.
322 // Note: No tested c++ compiler actually allows `operator<<>`, but some environments still procude this.
323 else if (leftShift == *operatorToken)
324 {
325 dropSpaceInput();
326
327 if (auto const* const nextOp = peek_if<lexing::operator_or_punctuator>();
328 nextOp
329 // When next token starts a function or template, we know it's actually `operator <<`.
330 && (openingParens == *nextOp || openingAngle == *nextOp))
331 {
332 m_TokenStack.emplace_back(
333 token::Identifier{
334 .content = token::Identifier::OperatorInfo{.symbol = next.content}});
335 }
336 // looks like an `operator< <>`, so just treat both `<` separately.
337 else
338 {
339 m_TokenStack.emplace_back(
340 token::Identifier{
341 .content = token::Identifier::OperatorInfo{.symbol = next.content.substr(0u, 1u)}});
342 handle_lexer_token(next.content.substr(1u, 1u), openingAngle);
343 }
344 }
345 else
346 {
347 m_TokenStack.emplace_back(
348 token::Identifier{
349 .content = token::Identifier::OperatorInfo{.symbol = next.content}});
350 }
351
352 dropSpaceInput();
353
354 return true;
355 }
356 else if (auto const* keywordToken = std::get_if<lexing::keyword>(&next.classification);
357 keywordToken
358 && util::contains(std::array{newKeyword, deleteKeyword, coAwaitKeyword}, *keywordToken))
359 {
360 std::ignore = m_Lexer.next();
361
362 StringViewT content = next.content;
363
364 if (newKeyword == *keywordToken || deleteKeyword == *keywordToken)
365 {
366 dropSpaceInput();
367
368 if (auto const* const opAfter = peek_if<lexing::operator_or_punctuator>();
369 opAfter
370 && openingSquare == *opAfter)
371 {
372 // Strip `[]` or `[ ]` from the input.
373 std::ignore = m_Lexer.next();
374 dropSpaceInput();
375 auto const closing = m_Lexer.next();
376 MIMICPP_ASSERT(closingSquare == std::get<lexing::operator_or_punctuator>(closing.classification), "Invalid input.");
377
378 content = StringViewT{
379 next.content.data(),
380 closing.content.data() + closing.content.size()};
381 }
382 }
383
384 m_TokenStack.emplace_back(
385 token::Identifier{
386 .content = token::Identifier::OperatorInfo{.symbol = content}});
387
388 dropSpaceInput();
389
390 return true;
391 }
392
393 return false;
394 }
395
396 constexpr void handle_lexer_token(StringViewT const content, lexing::operator_or_punctuator const& token)
397 {
398 if (scopeResolution == token)
399 {
401
402 m_TokenStack.emplace_back(
403 std::in_place_type<token::ScopeResolution>,
404 content);
406 }
407 else if (commaSeparator == token)
408 {
409 if (is_suffix_of<token::Type>(m_TokenStack)
410 || token::try_reduce_as_type(m_TokenStack))
411 {
413 }
414
415 m_TokenStack.emplace_back(
416 std::in_place_type<token::ArgSeparator>,
417 content);
418 }
419 else if (lvalueRef == token)
420 {
421 auto& specs = token::get_or_emplace_specs(m_TokenStack);
422 MIMICPP_ASSERT(token::Specs::Refness::none == specs.refness, "Specs already is a reference.");
423 specs.refness = token::Specs::Refness::lvalue;
424 }
425 else if (rvalueRef == token)
426 {
427 auto& specs = token::get_or_emplace_specs(m_TokenStack);
428 MIMICPP_ASSERT(token::Specs::Refness::none == specs.refness, "Specs already is a reference.");
429 specs.refness = token::Specs::Refness::rvalue;
430 }
431 else if (pointer == token)
432 {
433 auto& specs = token::get_or_emplace_specs(m_TokenStack);
434 specs.layers.emplace_back();
435 }
436 else if (openingAngle == token)
437 {
438 m_TokenStack.emplace_back(
439 std::in_place_type<token::OpeningAngle>,
440 content);
441 }
442 else if (closingAngle == token)
443 {
444 if (is_suffix_of<token::Type>(m_TokenStack)
445 || token::try_reduce_as_type(m_TokenStack))
446 {
448 }
449
450 m_TokenStack.emplace_back(
451 std::in_place_type<token::ClosingAngle>,
452 content);
455 }
456 else if (openingParens == token)
457 {
458 m_TokenStack.emplace_back(
459 std::in_place_type<token::OpeningParens>,
460 content);
461 }
462 else if (closingParens == token)
463 {
464 bool isNextOpeningParens{false};
465 if (auto const* const nextOp = peek_if<lexing::operator_or_punctuator>())
466 {
467 isNextOpeningParens = (openingParens == *nextOp);
468 }
469
470 // There can be no `(` directly after function-args, thus do not perform any reduction if such a token is found.
471 // This helps when function-ptrs are given, so that we do not accidentally reduce something like `(__cdecl*)` as function-args.
472 if (!isNextOpeningParens)
473 {
474 if (is_suffix_of<token::Type>(m_TokenStack)
475 || token::try_reduce_as_type(m_TokenStack))
476 {
478 }
479 }
480
481 m_TokenStack.emplace_back(
482 std::in_place_type<token::ClosingParens>,
483 content);
484
485 if (bool const result = isNextOpeningParens
488 !result)
489 {
491 }
492 }
493 else if (openingCurly == token)
494 {
495 m_TokenStack.emplace_back(
496 std::in_place_type<token::OpeningCurly>,
497 content);
498 }
499 else if (closingCurly == token)
500 {
501 m_TokenStack.emplace_back(
502 std::in_place_type<token::ClosingCurly>,
503 content);
505 }
506 else if (backtick == token)
507 {
508 m_TokenStack.emplace_back(
509 std::in_place_type<token::OpeningBacktick>,
510 content);
511 }
512 else if (singleQuote == token)
513 {
515 {
516 unwrap_msvc_like_function();
517 }
518 // Something like `id1::id2' should become id1::id2, so just remove the leading backtick.
520 {
521 m_TokenStack.erase(m_TokenStack.cend() - 3u);
522 }
523 else
524 {
525 m_TokenStack.emplace_back(
526 std::in_place_type<token::ClosingSingleQuote>,
527 content);
528 // Well, some environments wrap in `' (like msvc) and some wrap in '' (libc++).
531 }
532 }
533 // The current parsing process will never receive an `<<` or `>>` without a preceding `operator` keyword.
534 // As the current `operator` parsing currently consumes the next op-symbol, we will never reach this point
535 // with an actual left or right-shift. So, to make that easier, just split them.
536 else if (leftShift == token)
537 {
538 handle_lexer_token(content.substr(0, 1u), openingAngle);
539 handle_lexer_token(content.substr(1u, 1u), openingAngle);
540 }
541 else if (rightShift == token)
542 {
543 handle_lexer_token(content.substr(0, 1u), closingAngle);
544 handle_lexer_token(content.substr(1u, 1u), closingAngle);
545 }
546 // A `~` without a preceding `operator` keyword must be followed by a type-name and forms a destructor.
547 else if (tilde == token)
548 {
549 if (auto const* nextId = peek_if<lexing::identifier>())
550 {
551 StringViewT const merged{
552 content.data(),
553 nextId->content.data() + nextId->content.size()};
554 m_TokenStack.emplace_back(token::Identifier{.content = merged});
555 std::ignore = m_Lexer.next();
556 }
557 }
558 // The msvc c++23 `std::stacktrace` implementation adds `+0x\d+` to function identifiers.
559 // The only reason to receive a `+`-token without an `operator`-token is exactly that case.
560 // So, just ignore it and skip the next identifier.
561 else if (plus == token)
562 {
563 if (auto const* const nextId = peek_if<lexing::identifier>();
564 nextId
565 && nextId->content.starts_with("0x"))
566 {
567 std::ignore = m_Lexer.next();
568 }
569 }
570 // The msvc c++23 `std::stacktrace` implementation seems to add something which looks like the executable-name as prefix.
571 // The only reason to receive a `!`-token without an `operator`-token is exactly that case.
572 // So, just ignore it and skip the previous identifier.
573 else if (exclamationMark == token
574 && is_suffix_of<token::Identifier>(m_TokenStack))
575 {
576 m_TokenStack.pop_back();
577 }
578 }
579
580 void unwrap_msvc_like_function()
581 {
582 MIMICPP_ASSERT(is_suffix_of<token::FunctionIdentifier>(m_TokenStack), "Invalid state.");
583
584 auto funIdentifier = std::get<token::FunctionIdentifier>(m_TokenStack.back());
585 m_TokenStack.pop_back();
586
587 std::optional<token::ScopeSequence> scopes{};
588 if (auto* const scopeSeq = match_suffix<token::ScopeSequence>(m_TokenStack))
589 {
590 scopes = std::move(*scopeSeq);
591 m_TokenStack.pop_back();
592 }
593
594 // Ignore return-types.
595 if (is_suffix_of<token::Type>(m_TokenStack))
596 {
597 m_TokenStack.pop_back();
598 }
599
600 MIMICPP_ASSERT(match_suffix<token::OpeningBacktick>(m_TokenStack), "Invalid state.");
601 m_TokenStack.pop_back();
602
603 // As we gather spaces in front of backticks, there may be a space here, too.
604 if (is_suffix_of<token::Space>(m_TokenStack))
605 {
606 m_TokenStack.pop_back();
607 }
608
609 MIMICPP_ASSERT(!is_suffix_of<token::ScopeSequence>(m_TokenStack), "Invlid state.");
610 if (scopes)
611 {
612 m_TokenStack.emplace_back(*std::move(scopes));
613 }
614
615 m_TokenStack.emplace_back(std::move(funIdentifier));
616 }
617 };
618}
619
620#endif
#define MIMICPP_ASSERT(condition, msg)
Definition Config.hpp:51
constexpr NameParser(Visitor visitor, StringViewT const &content) noexcept(std::is_nothrow_move_constructible_v< Visitor >)
Definition NameParser.hpp:33
constexpr void parse_function()
Definition NameParser.hpp:50
constexpr void parse_type()
Definition NameParser.hpp:40
@ rvalue
Definition NameParserTokens.hpp:186
@ lvalue
Definition NameParserTokens.hpp:185
@ none
Definition NameParserTokens.hpp:184
constexpr WildcardMatcher _
The wildcard matcher, always matching.
Definition GeneralMatchers.hpp:289
std::variant< end, space, keyword, operator_or_punctuator, identifier > token_class
Definition NameLexer.hpp:190
constexpr bool try_reduce_as_placeholder_identifier_wrapped(TokenStack &tokenStack)
Definition NameParserReductions.hpp:320
bool try_reduce_as_function(TokenStack &tokenStack)
Definition NameParserReductions.hpp:655
bool try_reduce_as_type(TokenStack &tokenStack)
Definition NameParserReductions.hpp:648
constexpr bool try_reduce_as_template_identifier(TokenStack &tokenStack)
Definition NameParserReductions.hpp:191
MIMICPP_DETAIL_CONSTEXPR_VECTOR bool try_reduce_as_arg_sequence(TokenStack &tokenStack)
Definition NameParserReductions.hpp:161
void reduce_as_conversion_operator_function_identifier(TokenStack &tokenStack)
Definition NameParserReductions.hpp:689
bool try_reduce_as_scope_sequence(TokenStack &tokenStack)
Definition NameParserReductions.hpp:121
bool try_reduce_as_function_identifier(TokenStack &tokenStack)
Definition NameParserReductions.hpp:277
MIMICPP_DETAIL_CONSTEXPR_VECTOR bool try_reduce_as_function_context(TokenStack &tokenStack)
Definition NameParserReductions.hpp:232
bool try_reduce_as_function_ptr(TokenStack &tokenStack)
Definition NameParserReductions.hpp:401
constexpr Specs & get_or_emplace_specs(TokenStack &tokenStack)
Definition NameParserReductions.hpp:718
Definition NameParser.hpp:27
constexpr bool is_suffix_of(std::span< Token const > const tokenStack) noexcept
Definition NameParserReductions.hpp:51
constexpr auto match_suffix(std::span< Token > const tokenStack) noexcept
Definition NameParserReductions.hpp:61
constexpr auto & unwrap_visitor(Visitor &visitor) noexcept
Definition NameParserTokens.hpp:69
constexpr detail::contains_fn contains
Determines, whether the specified value is contained in the given range.
Definition Algorithm.hpp:370
void unreachable()
Invokes undefined behavior.
Definition C++23Backports.hpp:40
std::basic_string_view< CharT, CharTraitsT > StringViewT
Definition Fwd.hpp:392