mimic++ v9.2.1
Loading...
Searching...
No Matches
NameLexer.hpp
Go to the documentation of this file.
1// Copyright Dominic (DNKpp) Koepke 2024 - 2025.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at
4// https://www.boost.org/LICENSE_1_0.txt)
5
6#ifndef MIMICPP_PRINTING_TYPE_NAME_LEXER_HPP
7#define MIMICPP_PRINTING_TYPE_NAME_LEXER_HPP
8
9#pragma once
10
11#include "mimic++/Fwd.hpp"
14
15#ifndef MIMICPP_DETAIL_IS_MODULE
16 #include <algorithm>
17 #include <array>
18 #include <cctype>
19 #include <functional>
20 #include <span>
21 #include <tuple>
22 #include <variant>
23#endif
24
26{
27 // see: https://en.cppreference.com/w/cpp/string/byte/isspace
28 inline auto constexpr is_space = [](char const c) noexcept {
29 return static_cast<bool>(
30 std::isspace(static_cast<unsigned char>(c)));
31 };
32
33 // see: https://en.cppreference.com/w/cpp/string/byte/isdigit
34 inline auto constexpr is_digit = [](char const c) noexcept {
35 return static_cast<bool>(
36 std::isdigit(static_cast<unsigned char>(c)));
37 };
38
39 namespace texts
40 {
41 // just list the noteworthy ones here
42 inline std::array constexpr visibilityKeywords = std::to_array<StringViewT>({"public", "protected", "private"});
43 inline std::array constexpr specKeywords = std::to_array<StringViewT>({"const", "constexpr", "volatile", "noexcept", "static"});
44 inline std::array constexpr contextKeywords = std::to_array<StringViewT>({"operator", "struct", "class", "enum"});
45 inline std::array constexpr typeKeywords = std::to_array<StringViewT>(
46 // The `__int64` keyword is used by msvc as an alias for `long long`.
47 {"auto", "void", "bool", "char", "char8_t", "char16_t", "char32_t", "wchar_t", "double", "float", "int", "long", "__int64", "short", "signed", "unsigned"});
48 inline std::array constexpr otherKeywords = std::to_array<StringViewT>({"new", "delete", "co_await"});
49 inline std::array constexpr digraphs = std::to_array<StringViewT>(
50 {"and", "or", "xor", "not", "bitand", "bitor", "compl", "and_eq", "or_eq", "xor_eq", "not_eq"});
51
52 inline std::array constexpr braceLikes = std::to_array<StringViewT>({"{", "}", "[", "]", "(", ")", "`", "'"});
53 inline std::array constexpr comparison = std::to_array<StringViewT>({"==", "!=", "<", "<=", ">", ">=", "<=>"});
54 inline std::array constexpr assignment = std::to_array<StringViewT>({"=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="});
55 inline std::array constexpr incOrDec = std::to_array<StringViewT>({"++", "--"});
56 inline std::array constexpr arithmetic = std::to_array<StringViewT>({"+", "-", "*", "/", "%"});
57 inline std::array constexpr bitArithmetic = std::to_array<StringViewT>({"~", "&", "|", "^", "<<", ">>"});
58 inline std::array constexpr logical = std::to_array<StringViewT>({"!", "&&", "||"});
59 inline std::array constexpr access = std::to_array<StringViewT>({".", ".*", "->", "->*"});
60 inline std::array constexpr specialAngles = std::to_array<StringViewT>({"<:", ":>", "<%", "%>"});
61 inline std::array constexpr rest = std::to_array<StringViewT>({"::", ";", ",", ":", "...", "?"});
62 }
63
64 [[nodiscard]]
65 consteval auto make_keyword_collection() noexcept
66 {
67 std::array collection = util::concat_arrays(
74
75 std::ranges::sort(collection);
76 MIMICPP_ASSERT(collection.cend() == std::ranges::unique(collection).begin(), "Fix your input!");
77
78 return collection;
79 }
80
81 [[nodiscard]]
83 {
84 // see: https://eel.is/c++draft/lex.operators#nt:operator-or-punctuator
85 std::array collection = util::concat_arrays(
96 std::ranges::sort(collection);
97 MIMICPP_ASSERT(collection.cend() == std::ranges::unique(collection).begin(), "Fix your input!");
98
99 return collection;
100 }
101
102 struct space
103 {
104 [[nodiscard]]
105 bool operator==(space const&) const = default;
106 };
107
108 struct keyword
109 {
110 public:
111 static constexpr std::array textCollection = make_keyword_collection();
112
113 [[nodiscard]]
114 explicit constexpr keyword(StringViewT const& text) noexcept
115 : keyword{
116 std::ranges::distance(
117 textCollection.cbegin(),
119 {
120 }
121
122 [[nodiscard]]
123 explicit constexpr keyword(std::ptrdiff_t const keywordIndex) noexcept
124 : m_KeywordIndex{keywordIndex}
125 {
126 MIMICPP_ASSERT(0 <= m_KeywordIndex && m_KeywordIndex < std::ranges::ssize(textCollection), "Invalid keyword.");
127 }
128
129 [[nodiscard]]
130 constexpr StringViewT text() const noexcept
131 {
132 return textCollection[m_KeywordIndex];
133 }
134
135 [[nodiscard]]
136 bool operator==(keyword const&) const = default;
137
138 private:
139 std::ptrdiff_t m_KeywordIndex;
140 };
141
143 {
144 public:
146
147 [[nodiscard]]
148 explicit constexpr operator_or_punctuator(StringViewT const& text) noexcept
150 std::ranges::distance(
151 textCollection.cbegin(),
153 {
154 }
155
156 [[nodiscard]]
157 explicit constexpr operator_or_punctuator(std::ptrdiff_t const textIndex) noexcept
158 : m_TextIndex{textIndex}
159 {
160 MIMICPP_ASSERT(0 <= m_TextIndex && m_TextIndex < std::ranges::ssize(textCollection), "Invalid operator or punctuator.");
161 }
162
163 [[nodiscard]]
164 constexpr StringViewT text() const noexcept
165 {
166 return textCollection[m_TextIndex];
167 }
168
169 [[nodiscard]]
170 bool operator==(operator_or_punctuator const&) const = default;
171
172 private:
173 std::ptrdiff_t m_TextIndex;
174 };
175
177 {
179
180 [[nodiscard]]
181 bool operator==(identifier const&) const = default;
182 };
183
184 struct end
185 {
186 [[nodiscard]]
187 bool operator==(end const&) const = default;
188 };
189
190 using token_class = std::variant<
191 end,
192 space,
193 keyword,
195 identifier>;
196
202
204 {
205 public:
206 [[nodiscard]]
207 explicit constexpr NameLexer(StringViewT text) noexcept
208 : m_Text{std::move(text)},
209 m_Next{find_next()}
210 {
211 }
212
213 [[nodiscard]]
214 constexpr token next() noexcept
215 {
216 return std::exchange(m_Next, find_next());
217 }
218
219 [[nodiscard]]
220 constexpr token const& peek() const noexcept
221 {
222 return m_Next;
223 }
224
225 private:
226 StringViewT m_Text;
227 token m_Next;
228
229 [[nodiscard]]
230 constexpr token find_next() noexcept
231 {
232 if (m_Text.empty())
233 {
234 return token{
235 .content = {m_Text.cend(), m_Text.cend()},
236 .classification = end{}
237 };
238 }
239
240 if (is_space(m_Text.front()))
241 {
242 // Multiple consecutive spaces or any whitespace character other than a single space
243 // carry no meaningful semantic value beyond delimitation.
244 // Although single spaces may sometimes influence the result and sometimes not,
245 // complicating the overall process, we filter out all non-single whitespace characters here.
246 if (StringViewT const content = next_as_space();
247 " " == content)
248 {
249 return token{
250 .content = content,
251 .classification = space{}};
252 }
253
254 return find_next();
255 }
256
257 if (auto const options = util::prefix_range(
259 m_Text.substr(0u, 1u)))
260 {
261 return next_as_op_or_punctuator(options);
262 }
263
264 StringViewT const content = next_as_identifier();
265 // As we do not perform any prefix-checks, we need to check now whether the token actually denotes a keyword.
266 if (auto const iter = util::binary_find(keyword::textCollection, content);
267 iter != keyword::textCollection.cend())
268 {
269 return token{
270 .content = content,
271 .classification = keyword{std::ranges::distance(keyword::textCollection.begin(), iter)}};
272 }
273
274 return token{
275 .content = content,
276 .classification = identifier{.content = content}};
277 }
278
279 [[nodiscard]]
280 constexpr StringViewT next_as_space() noexcept
281 {
282 auto const end = std::ranges::find_if_not(m_Text.cbegin() + 1, m_Text.cend(), is_space);
283 StringViewT const content{m_Text.cbegin(), end};
284 m_Text = StringViewT{end, m_Text.cend()};
285
286 return content;
287 }
288
293 [[nodiscard]]
294 constexpr token next_as_op_or_punctuator(std::span<StringViewT const> options) noexcept
295 {
296 MIMICPP_ASSERT(m_Text.substr(0u, 1u) == options.front(), "Assumption does not hold.");
297
298 auto const try_advance = [&, this](std::size_t const n) {
299 if (n <= m_Text.size())
300 {
301 return util::prefix_range(
302 options,
303 StringViewT{m_Text.cbegin(), m_Text.cbegin() + n});
304 }
305
306 return std::ranges::subrange{options.end(), options.end()};
307 };
308
309 std::size_t length{1u};
310 StringViewT const* lastMatch = &options.front();
311 while (auto const nextOptions = try_advance(length + 1))
312 {
313 ++length;
314 options = {nextOptions.begin(), nextOptions.end()};
315
316 // If the first string is exactly the size of the prefix, it's a match.
317 if (auto const& front = options.front();
318 length == front.size())
319 {
320 lastMatch = &front;
321 }
322 }
323
324 MIMICPP_ASSERT(!options.empty(), "Invalid state.");
325 MIMICPP_ASSERT(lastMatch, "Invalid state.");
326
327 auto const index = std::ranges::distance(operator_or_punctuator::textCollection.data(), lastMatch);
328 StringViewT const content{m_Text.substr(0u, lastMatch->size())};
329 m_Text.remove_prefix(lastMatch->size());
330
331 return token{
332 .content = content,
333 .classification = operator_or_punctuator{index}};
334 }
335
346 [[nodiscard]]
347 constexpr StringViewT next_as_identifier() noexcept
348 {
349 auto const last = std::ranges::find_if_not(
350 m_Text.cbegin() + 1,
351 m_Text.cend(),
352 [](auto const c) {
353 return !is_space(c)
354 && !std::ranges::binary_search(operator_or_punctuator::textCollection, StringViewT{&c, 1u});
355 });
356
357 StringViewT const content{m_Text.cbegin(), last};
358 m_Text = {last, m_Text.cend()};
359
360 return content;
361 }
362 };
363}
364
365#endif
#define MIMICPP_ASSERT(condition, msg)
Definition Config.hpp:51
constexpr token next() noexcept
Definition NameLexer.hpp:214
constexpr token const & peek() const noexcept
Definition NameLexer.hpp:220
constexpr NameLexer(StringViewT text) noexcept
Definition NameLexer.hpp:207
Definition NameLexer.hpp:40
std::array constexpr incOrDec
Definition NameLexer.hpp:55
std::array constexpr assignment
Definition NameLexer.hpp:54
std::array constexpr contextKeywords
Definition NameLexer.hpp:44
std::array constexpr typeKeywords
Definition NameLexer.hpp:45
std::array constexpr rest
Definition NameLexer.hpp:61
std::array constexpr logical
Definition NameLexer.hpp:58
std::array constexpr otherKeywords
Definition NameLexer.hpp:48
std::array constexpr digraphs
Definition NameLexer.hpp:49
std::array constexpr specialAngles
Definition NameLexer.hpp:60
std::array constexpr arithmetic
Definition NameLexer.hpp:56
std::array constexpr comparison
Definition NameLexer.hpp:53
std::array constexpr braceLikes
Definition NameLexer.hpp:52
std::array constexpr specKeywords
Definition NameLexer.hpp:43
std::array constexpr visibilityKeywords
Definition NameLexer.hpp:42
std::array constexpr access
Definition NameLexer.hpp:59
std::array constexpr bitArithmetic
Definition NameLexer.hpp:57
Definition NameLexer.hpp:26
consteval auto make_operator_or_punctuator_collection() noexcept
Definition NameLexer.hpp:82
std::variant< end, space, keyword, operator_or_punctuator, identifier > token_class
Definition NameLexer.hpp:190
auto constexpr is_space
Definition NameLexer.hpp:28
auto constexpr is_digit
Definition NameLexer.hpp:34
consteval auto make_keyword_collection() noexcept
Definition NameLexer.hpp:65
constexpr std::ranges::borrowed_subrange_t< Range > prefix_range(Range &&range, Prefix &&prefix)
Returns a view containing all elements, which start with the given prefix.
Definition Algorithm.hpp:192
constexpr std::array< T, firstN+secondN > concat_arrays(std::array< T, firstN > const &first, std::array< T, secondN > const &second)
Concatenates the given arrays by copying all elements into a new array.
Definition Algorithm.hpp:218
constexpr detail::binary_find_fn binary_find
Finds the specified value within the container and returns an iterator pointing to it....
Definition Algorithm.hpp:361
std::basic_string_view< CharT, CharTraitsT > StringViewT
Definition Fwd.hpp:392
Definition NameLexer.hpp:185
bool operator==(end const &) const =default
bool operator==(identifier const &) const =default
StringViewT content
Definition NameLexer.hpp:178
Definition NameLexer.hpp:109
constexpr keyword(StringViewT const &text) noexcept
Definition NameLexer.hpp:114
constexpr keyword(std::ptrdiff_t const keywordIndex) noexcept
Definition NameLexer.hpp:123
constexpr StringViewT text() const noexcept
Definition NameLexer.hpp:130
bool operator==(keyword const &) const =default
static constexpr std::array textCollection
Definition NameLexer.hpp:111
bool operator==(operator_or_punctuator const &) const =default
constexpr StringViewT text() const noexcept
Definition NameLexer.hpp:164
static constexpr std::array textCollection
Definition NameLexer.hpp:145
constexpr operator_or_punctuator(std::ptrdiff_t const textIndex) noexcept
Definition NameLexer.hpp:157
constexpr operator_or_punctuator(StringViewT const &text) noexcept
Definition NameLexer.hpp:148
Definition NameLexer.hpp:103
bool operator==(space const &) const =default
Definition NameLexer.hpp:198
token_class classification
Definition NameLexer.hpp:200
StringViewT content
Definition NameLexer.hpp:199