1 module parsingstream; 2 3 import std.conv; 4 import std.uni; 5 import std.exception; 6 7 debug import std.stdio; 8 9 /// This class takes a string and lets you perform simple matching operations to help in writing simple parsers 10 /// and keep your code readable. 11 struct ParsingStream(T = char) { 12 13 alias TString = immutable(T)[]; 14 alias Checker = bool delegate(T); 15 16 /// String we are operating on. 17 TString subject; 18 19 /// Current index. 20 size_t index; 21 22 /// Create the stream. 23 this(TString subject) { 24 25 this.subject = subject; 26 27 } 28 29 /// Returns: true if there is still something left to parse. 30 bool opCast(T : bool)() const { 31 32 return index < subject.length; 33 34 } 35 36 // Stepping 37 struct { 38 39 /// Match the current character against the function, return true if matched and proceed to the next character. 40 /// 41 /// Params: 42 /// check = Function to match against. 43 /// character = Reference which will be replaced with the character. 44 bool step(Checker check, out T character) { 45 46 // If there is no character left, fail the match 47 if (!this) return false; 48 49 // Get the character 50 character = subject[index]; 51 52 // Check it 53 if (check(character)) { 54 55 index += 1; 56 return true; 57 58 } 59 60 return false; 61 62 } 63 64 /// ditto 65 bool step(Checker check) { 66 67 T ignore; 68 return step(check, ignore); 69 70 } 71 72 /// 73 static if (is(T == char)) 74 unittest { 75 76 auto stream = parsingStream("hello"); 77 char ch; 78 assert( stream.step(a => a == 'h')); // First character 79 assert(!stream.step(a => a == 'l')); // Second character is an "e", won't match 80 assert( stream.step(a => a == 'e')); // Didn't progress, we can try again 81 assert( stream.step(a => a == 'l')); // Now this will match 82 83 // Using a reference 84 assert( stream.step(a => isAlpha(a), ch)); 85 assert(ch == 'l'); 86 87 } 88 89 /// Match the current character against the function and return it. 90 /// 91 /// Params: 92 /// check = Function to match against. 93 /// Throws: MatchException if the character wasn't matched. 94 T enforceStep(Checker check) { 95 96 T character; 97 98 // Check if matches 99 step(check, character) 100 101 // And throw an exception if it doesn't 102 .enforce!MatchException("Match failed."); 103 104 // Return the character 105 return character; 106 107 } 108 109 /// 110 static if (is(T == char)) 111 unittest { 112 113 auto stream = parsingStream("hello"); 114 115 // Matcher for "h" and "e" 116 ParsingStream.Checker check = a => a == 'h' || a == 'e'; 117 118 assert(stream.enforceStep(check) == 'h'); 119 assert(stream.enforceStep(check) == 'e'); 120 assertThrown(stream.enforceStep(check)); 121 122 } 123 124 /// A chainable version of this method. Matches the current character against the function and gives it via 125 /// a reference argument. 126 /// 127 /// Returns: Self, for chaining. 128 /// Params: 129 /// check = Function to match against. 130 /// match = Matched character (output). 131 /// Throws: MatchException if the character wasn't matched. 132 ref ParsingStream!T enforceStep(Checker check, out T match) { 133 134 match = enforceStep(check); 135 return this; 136 137 } 138 139 /// 140 static if (is(T == char)) 141 unittest { 142 143 auto stream = parsingStream("hi!"); 144 char a, b, c; 145 ParsingStream.Checker check = x => x.isAlpha; 146 147 stream 148 .enforceStep(check, a) 149 .enforceStep(check, b); 150 151 assert(a == 'h'); 152 assert(b != 'e'); 153 assertThrown!MatchException(stream.enforceStep(check, c)); 154 155 } 156 157 } 158 159 // Matching 160 struct { 161 162 /// Match all next characters against the function until it returns `false`. 163 /// 164 /// Params: 165 /// check = Function to match against. 166 /// Returns: All matched characters. 167 TString match(Checker check) { 168 169 TString result; 170 T lastChar; 171 172 // Step until failure 173 while (step(check, lastChar)) { 174 175 result ~= [lastChar]; 176 177 } 178 179 return result; 180 181 } 182 183 /// 184 static if (is(T == char)) 185 unittest { 186 187 auto stream = parsingStream("This is a sentence."); 188 ParsingStream.Checker check = a => a.isAlpha; 189 190 // Match whole words 191 assert(stream.skip.match(check) == "This"); 192 assert(stream.skip.match(check) == "is"); 193 assert(stream.skip.match(check) != "not"); 194 assert(stream.skip.match(check) == "sentence"); 195 196 } 197 198 /// Match all next characters against the function until it returns `false`. 199 /// 200 /// Throws: MatchException if didn't match anything 201 /// Params: 202 /// check = Function to match against. 203 /// Returns: All matched characters. 204 TString enforceMatch(Checker check) { 205 206 auto result = match(check); 207 enforce!MatchException(result.length > 0, "Empty match."); 208 return result; 209 210 } 211 212 /// 213 static if (is(T == char)) 214 unittest { 215 216 auto stream = parsingStream("This is a sentence"); 217 ParsingStream.Checker check = a => a.isAlpha; 218 219 220 assert(stream.enforceMatch(check) == "This"); 221 222 // Will fail, there is some whitespace before — remember to skip()! 223 assertThrown(stream.enforceMatch(check)); 224 225 assert(stream.skip.enforceMatch(check) == "is"); 226 227 } 228 229 /// A chainable version of the method. Matches all next characters against the function until it returns `false`. 230 /// 231 /// Throws: MatchException if didn't match anything 232 /// Params: 233 /// check = Function to match against. 234 /// match = Matched string (output). 235 /// Returns: Self, for chaining. 236 ref ParsingStream!T enforceMatch(Checker check, out TString match) { 237 238 match = enforceMatch(check); 239 return this; 240 241 } 242 243 /// 244 static if (is(T == char)) 245 unittest { 246 247 auto stream = parsingStream("This is a sentence"); 248 ParsingStream.Checker check = a => a.isAlpha; 249 string a, b, c; 250 251 assertNotThrown(stream 252 .skip.enforceMatch(check, a) 253 .skip.enforceMatch(check, b) 254 ); 255 256 assert(a == "This" && b == "is"); 257 258 // No .skip! 259 assertThrown(stream.enforceMatch(check, c)); 260 261 } 262 263 } 264 265 // Matching until 266 struct { 267 268 /// Match all next characters against the function until it returns `true` 269 /// 270 /// Params: 271 /// check = Function to match against. 272 /// Returns: All matched characters. 273 TString matchUntil(Checker check) { 274 275 return this.match(ch => !check(ch)); 276 277 } 278 279 /// 280 static if (is(T == char)) 281 unittest { 282 283 auto stream = parsingStream("This is a sentence"); 284 285 assert(stream.skip.matchUntil(ch => ch == ' ') == "This"); 286 assert(stream.skip.matchUntil(ch => ch == ' ') == "is"); 287 assert(stream.skip.matchUntil(ch => ch == 'n') == "a se"); 288 assert(!stream.step(a => a == 't')); 289 290 } 291 292 /// Match all next characters against the function until it returns `true` 293 /// 294 /// Params: 295 /// check = Function to match against. 296 /// Returns: All matched characters. 297 /// Throws: MatchException if didn't match anything 298 TString enforceUntil(Checker check) { 299 300 auto match = matchUntil(check); 301 enforce!MatchException(match.length > 0, "Empty match."); 302 return match; 303 304 } 305 306 /// A chainable variant of the function. Match all next characters against the function until it returns `true` 307 /// 308 /// Params: 309 /// check = Function to match against. 310 /// match = Matched characters (output). 311 /// Returns: Self, for chaining. 312 /// Throws: MatchException if didn't match anything 313 ref ParsingStream!T enforceUntil(Checker check, out TString match) { 314 315 match = enforceUntil(check); 316 return this; 317 318 } 319 320 } 321 322 // Skipping 323 struct { 324 325 /// Skip one character, without matching. 326 ref ParsingStream!T skipOne() { 327 328 index += 1; 329 return this; 330 331 } 332 333 /// Skip all characters until one doesn't match, for built-in chars, Unicode whitespace is the default. 334 /// 335 /// Params: 336 /// check = Function to match against. 337 /// Returns: the stream, to allow chaining with other methods. 338 ref ParsingStream!T skip(Checker check) { 339 340 match(check); 341 return this; 342 343 } 344 345 static if (is(T == char) || is(T == wchar) || is(T == dchar)) { 346 347 /// ditto 348 ref ParsingStream!T skip() { 349 350 match(a => isWhite(a)); 351 return this; 352 353 } 354 355 } 356 357 /// Skip all characters until one matches. 358 /// 359 /// Params: 360 /// check = Function to match against. 361 /// Returns: the stream, to allow chaining with other methods. 362 ref ParsingStream!T skipUntil(Checker check) { 363 364 matchUntil(check); 365 return this; 366 367 } 368 369 /// Skip a single character if it matches. 370 /// Params: 371 /// check = Function to match against. 372 /// Returns: the stream, to allow chaining with other methods. 373 ref ParsingStream!T skipStep(Checker check) { 374 375 step(check); 376 return this; 377 378 } 379 380 /// 381 static if (is(T == char)) 382 unittest { 383 384 auto stream = parsingStream(" white = space(stuff)"); 385 386 assert(stream.skip().match(a => a.isAlpha) == "white"); 387 388 } 389 390 } 391 392 } 393 394 /// ditto 395 ParsingStream!char parsingStream(string content = "") { 396 397 return ParsingStream!char(content); 398 399 } 400 401 /// An exception thrown if a match fails. 402 class MatchException : Exception { 403 404 /// Create a match exception 405 this(string content) { 406 407 super(content); 408 409 } 410 411 /// Create a match exception 412 this(string content, string file, size_t line) { 413 414 super(content, file, line); 415 416 } 417 418 }