1 module parsingstream;
2 
3 import std.conv;
4 import std.uni;
5 import std.exception;
6 
7 debug import std.stdio;
8 
9 /// This class takes a string and lets you perform simple matching operations to help in writing simple parsers
10 /// and keep your code readable.
11 struct ParsingStream(T = char) {
12 
13     alias TString = immutable(T)[];
14     alias Checker = bool delegate(T);
15 
16     /// String we are operating on.
17     TString subject;
18 
19     /// Current index.
20     size_t index;
21 
22     /// Create the stream.
23     this(TString subject) {
24 
25         this.subject = subject;
26 
27     }
28 
29     /// Returns: true if there is still something left to parse.
30     bool opCast(T : bool)() const {
31 
32         return index < subject.length;
33 
34     }
35 
36     // Stepping
37     struct {
38 
39         /// Match the current character against the function, return true if matched and proceed to the next character.
40         ///
41         /// Params:
42         ///     check = Function to match against.
43         ///     character = Reference which will be replaced with the character.
44         bool step(Checker check, out T character) {
45 
46             // If there is no character left, fail the match
47             if (!this) return false;
48 
49             // Get the character
50             character = subject[index];
51 
52             // Check it
53             if (check(character)) {
54 
55                 index += 1;
56                 return true;
57 
58             }
59 
60             return false;
61 
62         }
63 
64         /// ditto
65         bool step(Checker check) {
66 
67             T ignore;
68             return step(check, ignore);
69 
70         }
71 
72         ///
73         static if (is(T == char))
74         unittest {
75 
76             auto stream = parsingStream("hello");
77             char ch;
78             assert( stream.step(a => a == 'h'));  // First character
79             assert(!stream.step(a => a == 'l'));  // Second character is an "e", won't match
80             assert( stream.step(a => a == 'e'));  // Didn't progress, we can try again
81             assert( stream.step(a => a == 'l'));  // Now this will match
82 
83             // Using a reference
84             assert( stream.step(a => isAlpha(a), ch));
85             assert(ch == 'l');
86 
87         }
88 
89         /// Match the current character against the function and return it.
90         ///
91         /// Params:
92         ///     check = Function to match against.
93         /// Throws: MatchException if the character wasn't matched.
94         T enforceStep(Checker check) {
95 
96             T character;
97 
98             // Check if matches
99             step(check, character)
100 
101                 // And throw an exception if it doesn't
102                 .enforce!MatchException("Match failed.");
103 
104             // Return the character
105             return character;
106 
107         }
108 
109         ///
110         static if (is(T == char))
111         unittest {
112 
113             auto stream = parsingStream("hello");
114 
115             // Matcher for "h" and "e"
116             ParsingStream.Checker check = a => a == 'h' || a == 'e';
117 
118             assert(stream.enforceStep(check) == 'h');
119             assert(stream.enforceStep(check) == 'e');
120             assertThrown(stream.enforceStep(check));
121 
122         }
123 
124         /// A chainable version of this method. Matches the current character against the function and gives it via
125         /// a reference argument.
126         ///
127         /// Returns: Self, for chaining.
128         /// Params:
129         ///     check = Function to match against.
130         ///     match = Matched character (output).
131         /// Throws: MatchException if the character wasn't matched.
132         ref ParsingStream!T enforceStep(Checker check, out T match) {
133 
134             match = enforceStep(check);
135             return this;
136 
137         }
138 
139         ///
140         static if (is(T == char))
141         unittest {
142 
143             auto stream = parsingStream("hi!");
144             char a, b, c;
145             ParsingStream.Checker check = x => x.isAlpha;
146 
147             stream
148                 .enforceStep(check, a)
149                 .enforceStep(check, b);
150 
151             assert(a == 'h');
152             assert(b != 'e');
153             assertThrown!MatchException(stream.enforceStep(check, c));
154 
155         }
156 
157     }
158 
159     // Matching
160     struct {
161 
162         /// Match all next characters against the function until it returns `false`.
163         ///
164         /// Params:
165         ///     check = Function to match against.
166         /// Returns: All matched characters.
167         TString match(Checker check) {
168 
169             TString result;
170             T lastChar;
171 
172             // Step until failure
173             while (step(check, lastChar)) {
174 
175                 result ~= [lastChar];
176 
177             }
178 
179             return result;
180 
181         }
182 
183         ///
184         static if (is(T == char))
185         unittest {
186 
187             auto stream = parsingStream("This is a sentence.");
188             ParsingStream.Checker check = a => a.isAlpha;
189 
190             // Match whole words
191             assert(stream.skip.match(check) == "This");
192             assert(stream.skip.match(check) == "is");
193             assert(stream.skip.match(check) != "not");
194             assert(stream.skip.match(check) == "sentence");
195 
196         }
197 
198         /// Match all next characters against the function until it returns `false`.
199         ///
200         /// Throws: MatchException if didn't match anything
201         /// Params:
202         ///     check = Function to match against.
203         /// Returns: All matched characters.
204         TString enforceMatch(Checker check) {
205 
206             auto result = match(check);
207             enforce!MatchException(result.length > 0, "Empty match.");
208             return result;
209 
210         }
211 
212         ///
213         static if (is(T == char))
214         unittest {
215 
216             auto stream = parsingStream("This is a sentence");
217             ParsingStream.Checker check = a => a.isAlpha;
218 
219 
220             assert(stream.enforceMatch(check) == "This");
221 
222             // Will fail, there is some whitespace before — remember to skip()!
223             assertThrown(stream.enforceMatch(check));
224 
225             assert(stream.skip.enforceMatch(check) == "is");
226 
227         }
228 
229         /// A chainable version of the method. Matches all next characters against the function until it returns `false`.
230         ///
231         /// Throws: MatchException if didn't match anything
232         /// Params:
233         ///     check = Function to match against.
234         ///     match = Matched string (output).
235         /// Returns: Self, for chaining.
236         ref ParsingStream!T enforceMatch(Checker check, out TString match) {
237 
238             match = enforceMatch(check);
239             return this;
240 
241         }
242 
243         ///
244         static if (is(T == char))
245         unittest {
246 
247             auto stream = parsingStream("This is a sentence");
248             ParsingStream.Checker check = a => a.isAlpha;
249             string a, b, c;
250 
251             assertNotThrown(stream
252                 .skip.enforceMatch(check, a)
253                 .skip.enforceMatch(check, b)
254             );
255 
256             assert(a == "This" && b == "is");
257 
258             // No .skip!
259             assertThrown(stream.enforceMatch(check, c));
260 
261         }
262 
263     }
264 
265     // Matching until
266     struct {
267 
268         /// Match all next characters against the function until it returns `true`
269         ///
270         /// Params:
271         ///     check = Function to match against.
272         /// Returns: All matched characters.
273         TString matchUntil(Checker check) {
274 
275             return this.match(ch => !check(ch));
276 
277         }
278 
279         ///
280         static if (is(T == char))
281         unittest {
282 
283             auto stream = parsingStream("This is a sentence");
284 
285             assert(stream.skip.matchUntil(ch => ch == ' ') == "This");
286             assert(stream.skip.matchUntil(ch => ch == ' ') == "is");
287             assert(stream.skip.matchUntil(ch => ch == 'n') == "a se");
288             assert(!stream.step(a => a == 't'));
289 
290         }
291 
292         /// Match all next characters against the function until it returns `true`
293         ///
294         /// Params:
295         ///     check = Function to match against.
296         /// Returns: All matched characters.
297         /// Throws: MatchException if didn't match anything
298         TString enforceUntil(Checker check) {
299 
300             auto match = matchUntil(check);
301             enforce!MatchException(match.length > 0, "Empty match.");
302             return match;
303 
304         }
305 
306         /// A chainable variant of the function. Match all next characters against the function until it returns `true`
307         ///
308         /// Params:
309         ///     check = Function to match against.
310         ///     match = Matched characters (output).
311         /// Returns: Self, for chaining.
312         /// Throws: MatchException if didn't match anything
313         ref ParsingStream!T enforceUntil(Checker check, out TString match) {
314 
315             match = enforceUntil(check);
316             return this;
317 
318         }
319 
320     }
321 
322     // Skipping
323     struct {
324 
325         /// Skip one character, without matching.
326         ref ParsingStream!T skipOne() {
327 
328             index += 1;
329             return this;
330 
331         }
332 
333         /// Skip all characters until one doesn't match, for built-in chars, Unicode whitespace is the default.
334         ///
335         /// Params:
336         ///     check = Function to match against.
337         /// Returns: the stream, to allow chaining with other methods.
338         ref ParsingStream!T skip(Checker check) {
339 
340             match(check);
341             return this;
342 
343         }
344 
345         static if (is(T == char) || is(T == wchar) || is(T == dchar)) {
346 
347             /// ditto
348             ref ParsingStream!T skip() {
349 
350                 match(a => isWhite(a));
351                 return this;
352 
353             }
354 
355         }
356 
357         /// Skip all characters until one matches.
358         ///
359         /// Params:
360         ///     check = Function to match against.
361         /// Returns: the stream, to allow chaining with other methods.
362         ref ParsingStream!T skipUntil(Checker check) {
363 
364             matchUntil(check);
365             return this;
366 
367         }
368 
369         /// Skip a single character if it matches.
370         /// Params:
371         ///     check = Function to match against.
372         /// Returns: the stream, to allow chaining with other methods.
373         ref ParsingStream!T skipStep(Checker check) {
374 
375             step(check);
376             return this;
377 
378         }
379 
380         ///
381         static if (is(T == char))
382         unittest {
383 
384             auto stream = parsingStream("  white  = space(stuff)");
385 
386             assert(stream.skip().match(a => a.isAlpha) == "white");
387 
388         }
389 
390     }
391 
392 }
393 
394 /// ditto
395 ParsingStream!char parsingStream(string content = "") {
396 
397     return ParsingStream!char(content);
398 
399 }
400 
401 /// An exception thrown if a match fails.
402 class MatchException : Exception {
403 
404     /// Create a match exception
405     this(string content) {
406 
407         super(content);
408 
409     }
410 
411     /// Create a match exception
412     this(string content, string file, size_t line) {
413 
414         super(content, file, line);
415 
416     }
417 
418 }