SourcePP
Several modern C++20 libraries for sanely parsing Valve's formats.
Loading...
Searching...
No Matches
Text.cpp
Go to the documentation of this file.
2
3#include <algorithm>
4#include <cctype>
5
6#include <BufferStream.h>
7
8using namespace sourcepp;
9
11 static const EscapeSequenceMap defaultEscapeSequences{
12 {'\'', '\''},
13 {'\"', '\"'},
14 {'?', '?'},
15 {'\\', '\\'},
16 {'a', '\a'},
17 {'b', '\b'},
18 {'f', '\f'},
19 {'n', '\n'},
20 {'r', '\r'},
21 {'t', '\t'},
22 {'v', '\v'},
23 };
24 return defaultEscapeSequences;
25}
26
28 static const EscapeSequenceMap noEscapeSequences{};
29 return useEscapes ? getDefaultEscapeSequences() : noEscapeSequences;
30}
31
33 return c == '\n' || c == '\r';
34}
35
36bool parser::text::isNewLine(std::string_view str) {
37 return std::ranges::all_of(str, [](char c) { return isNewLine(c); });
38}
39
41 return c == ' ' || c == '\a' || c == '\f' || c == '\t' || c == '\v' || isNewLine(c);
42}
43
44bool parser::text::isWhitespace(std::string_view str) {
45 return std::ranges::all_of(str, [](char c) { return isWhitespace(c); });
46}
47
49 return std::isdigit(c);
50}
51
52bool parser::text::isNumber(std::string_view str) {
53 return std::ranges::all_of(str, [](char c) { return isNumber(c); });
54}
55
56std::string parser::text::convertSpecialCharsToEscapes(std::string_view str, const EscapeSequenceMap& escapeSequences) {
57 // Reverse escape sequences map (assume that it's bidirectional)
58 EscapeSequenceMap specialSequences;
59 for (const auto& [normal, special] : escapeSequences) {
60 specialSequences[special] = normal;
61 }
62
63 std::string out;
64 for (int i = 0; i < str.length(); i++) {
65 if (specialSequences.contains(str[i])) {
66 out += '\\';
67 out += specialSequences[str[i]];
68 } else {
69 out += str[i];
70 }
71 }
72 return out;
73}
74
75std::string parser::text::convertEscapesToSpecialChars(std::string_view str, const EscapeSequenceMap& escapeSequences) {
76 std::string out;
77 for (int i = 0; i < str.length(); i++) {
78 if (!escapeSequences.empty() && str[i] == '\\' && i < str.length() - 1) {
79 auto n = str[i + 1];
80 if (escapeSequences.contains(n)) {
81 out += escapeSequences.at(n);
82 } else {
83 out += str[i];
84 out += str[i + 1];
85 }
86 } else {
87 out += str[i];
88 }
89 }
90 return out;
91}
92
93void parser::text::eatWhitespace(BufferStream& stream) {
94 while (isWhitespace(stream.read<char>())) {}
95 stream.seek(-1, std::ios::cur);
96}
97
98void parser::text::eatSingleLineComment(BufferStream& stream) {
99 while (!isNewLine(stream.read<char>())) {}
100}
101
102void parser::text::eatMultiLineComment(BufferStream& stream, std::string_view multiLineCommentEnd) {
103 while (!std::ranges::equal(stream.read_span<char>(multiLineCommentEnd.length()), multiLineCommentEnd)) {
104 stream.seek(-static_cast<int64_t>(multiLineCommentEnd.length() - 1), std::ios::cur);
105 }
106}
107
108void parser::text::eatWhitespaceAndSingleLineComments(BufferStream& stream, std::string_view singleLineCommentStart) {
109 return eatWhitespaceAndComments(stream, singleLineCommentStart, "");
110}
111
112void parser::text::eatWhitespaceAndMultiLineComments(BufferStream& stream, std::string_view multiLineCommentStart) {
113 return eatWhitespaceAndComments(stream, "", multiLineCommentStart);
114}
115
116// NOLINTNEXTLINE(*-no-recursion)
117void parser::text::eatWhitespaceAndComments(BufferStream& stream, std::string_view singleLineCommentStart, std::string_view multiLineCommentStart) {
118 eatWhitespace(stream);
119
120 if (!singleLineCommentStart.empty()) {
121 if (std::ranges::equal(stream.read_span<char>(singleLineCommentStart.length()), singleLineCommentStart)) {
122 eatSingleLineComment(stream);
123 eatWhitespaceAndComments(stream, singleLineCommentStart, multiLineCommentStart);
124 return;
125 }
126 stream.seek(-static_cast<int64_t>(singleLineCommentStart.length()), std::ios::cur);
127 }
128
129 if (!multiLineCommentStart.empty()) {
130 if (std::ranges::equal(stream.read_span<char>(multiLineCommentStart.length()), multiLineCommentStart)) {
131 eatMultiLineComment(stream);
132 eatWhitespaceAndComments(stream, singleLineCommentStart, multiLineCommentStart);
133 return;
134 }
135 stream.seek(-static_cast<int64_t>(multiLineCommentStart.length()), std::ios::cur);
136 }
137}
138
139bool parser::text::tryToEatChar(BufferStream& stream, char c) {
140 if (stream.peek<char>() != c) {
141 return false;
142 }
143 stream.skip();
144 return true;
145}
146
147std::string_view parser::text::readStringToBuffer(BufferStream& stream, BufferStream& backing, std::string_view start, std::string_view end, const EscapeSequenceMap& escapeSequences) {
148 const auto startSpan = backing.tell();
149
150 bool stopAtWhitespace = true;
151 char c = stream.read<char>();
152 if (start.find(c) != std::string_view::npos) {
153 stopAtWhitespace = false;
154 } else {
155 backing << c;
156 }
157
158 for (c = stream.read<char>(); (stopAtWhitespace && !isWhitespace(c)) || (!stopAtWhitespace && end.find(c) == std::string_view::npos); c = stream.read<char>()) {
159 if (!escapeSequences.empty() && c == '\\') {
160 auto n = stream.read<char>();
161 if (stopAtWhitespace && isWhitespace(n)) {
162 break;
163 }
164 if (escapeSequences.contains(n)) {
165 backing << escapeSequences.at(n);
166 } else if (!stopAtWhitespace && end.find(n) != std::string_view::npos) {
167 break;
168 } else {
169 backing << c << n;
170 }
171 } else {
172 backing << c;
173 }
174 }
175
176 backing << '\0';
177 return {reinterpret_cast<const char*>(backing.data()) + startSpan, backing.tell() - 1 - startSpan};
178}
179
180std::string_view parser::text::readUnquotedStringToBuffer(BufferStream& stream, BufferStream& backing, const EscapeSequenceMap& escapeSequences) {
181 return readStringToBuffer(stream, backing, "", "", escapeSequences);
182}
183
184std::string_view parser::text::readUnquotedStringToBuffer(BufferStream& stream, BufferStream& backing, std::string_view end, const EscapeSequenceMap& escapeSequences) {
185 const auto startSpan = backing.tell();
186
187 for (char c = stream.read<char>(); !isWhitespace(c) && end.find(c) == std::string_view::npos; c = stream.read<char>()) {
188 if (!escapeSequences.empty() && c == '\\') {
189 auto n = stream.read<char>();
190 if (escapeSequences.contains(n)) {
191 backing << escapeSequences.at(n);
192 } else if (isWhitespace(n) || end.find(n) != std::string_view::npos) {
193 break;
194 } else {
195 backing << c << n;
196 }
197 } else {
198 backing << c;
199 }
200 }
201
202 backing << '\0';
203 return {reinterpret_cast<const char*>(backing.data()) + startSpan, backing.tell() - 1 - startSpan};
204}
std::string convertSpecialCharsToEscapes(std::string_view str, const EscapeSequenceMap &escapeSequences)
Convert special characters like \n to escaped special characters like \\n.
Definition: Text.cpp:56
std::string convertEscapesToSpecialChars(std::string_view str, const EscapeSequenceMap &escapeSequences)
Convert escaped special characters like \\n to special characters like \n.
Definition: Text.cpp:75
std::unordered_map< char, char > EscapeSequenceMap
Definition: Text.h:17
void eatMultiLineComment(BufferStream &stream, std::string_view multiLineCommentEnd=DEFAULT_MULTI_LINE_COMMENT_END)
If a multi line comment is detected, eat its contents.
Definition: Text.cpp:102
bool isNewLine(char c)
If a char is a newline character.
Definition: Text.cpp:32
void eatSingleLineComment(BufferStream &stream)
If a single line comment is detected, eat its contents.
Definition: Text.cpp:98
void eatWhitespaceAndSingleLineComments(BufferStream &stream, std::string_view singleLineCommentStart=DEFAULT_SINGLE_LINE_COMMENT_START)
Eat all whitespace and single line comments after the current stream position.
Definition: Text.cpp:108
void eatWhitespace(BufferStream &stream)
Eat all whitespace after the current stream position.
Definition: Text.cpp:93
bool tryToEatChar(BufferStream &stream, char c)
If the given char exists at the current position, skip over it.
Definition: Text.cpp:139
const EscapeSequenceMap & getDefaultEscapeSequencesOrNone(bool useEscapes)
Definition: Text.cpp:27
std::string_view readUnquotedStringToBuffer(BufferStream &stream, BufferStream &backing, const EscapeSequenceMap &escapeSequences=getDefaultEscapeSequences())
Read a string starting at the current stream position.
Definition: Text.cpp:180
std::string_view readStringToBuffer(BufferStream &stream, BufferStream &backing, std::string_view start=DEFAULT_STRING_START, std::string_view end=DEFAULT_STRING_END, const EscapeSequenceMap &escapeSequences=getDefaultEscapeSequences())
Read a string starting at the current stream position.
Definition: Text.cpp:147
void eatWhitespaceAndComments(BufferStream &stream, std::string_view singleLineCommentStart=DEFAULT_SINGLE_LINE_COMMENT_START, std::string_view multiLineCommentStart=DEFAULT_MULTI_LINE_COMMENT_START)
Eat all whitespace and comments after the current stream position.
Definition: Text.cpp:117
bool isNumber(char c)
If a char is a numerical character (0-9).
Definition: Text.cpp:48
const EscapeSequenceMap & getDefaultEscapeSequences()
Definition: Text.cpp:10
void eatWhitespaceAndMultiLineComments(BufferStream &stream, std::string_view multiLineCommentStart=DEFAULT_MULTI_LINE_COMMENT_START)
Eat all whitespace and multi line comments after the current stream position.
Definition: Text.cpp:112
bool isWhitespace(char c)
If a char is a whitespace character.
Definition: Text.cpp:40
Definition: LZMA.h:11