1 module hipddf.parser; 2 import hipddf.types; 3 import std.conv: to; 4 5 6 enum HipDDFTokenType 7 { 8 assignment, 9 comma, 10 colon, 11 semicolon, 12 openParenthesis, 13 closeParenthesis, 14 openSquareBrackets, 15 closeSquareBrackets, 16 openCurlyBrackets, 17 closeCurlyBrackets, 18 endOfStream, 19 symbol, 20 stringLiteral, 21 numberLiteral, 22 unknown 23 } 24 25 struct HipDDFToken 26 { 27 string str; 28 HipDDFTokenType type; 29 30 string toString() 31 { 32 string T; 33 swt: final switch(type) 34 { 35 static foreach(m; __traits(allMembers, HipDDFTokenType)) 36 { 37 case __traits(getMember, HipDDFTokenType, m): 38 T = m.stringof; 39 break swt; 40 } 41 } 42 return str~" ("~T~")"; 43 } 44 } 45 46 struct HipDDFTokenizer 47 { 48 string str; 49 string filename; 50 ulong pos; 51 uint line; 52 HipDDFObjectInternal* obj; 53 54 /** Returns str[pos] */ 55 pragma(inline) @nogc nothrow @safe char get(){return str[pos];} 56 /** Returns str[pos+1], used for not needing to access every time its members */ 57 pragma(inline) @nogc nothrow @safe char next(){return str[pos+1];} 58 /** Returns str.length - pos */ 59 pragma(inline) @nogc nothrow @safe int restLength(){return cast(int)(str.length - pos);} 60 61 62 } 63 64 nothrow @safe @nogc 65 private void advanceWhitespace(HipDDFTokenizer* tokenizer) 66 { 67 while(tokenizer.restLength > 0) 68 { 69 if(isWhitespace(tokenizer.get)) 70 { 71 if(tokenizer.get == '\n') 72 tokenizer.line++; 73 tokenizer.pos++; 74 } 75 else if(tokenizer.get == '/' && tokenizer.restLength > 1 && tokenizer.next == '/') 76 { 77 while(!isEndOfLine(tokenizer.get)) 78 tokenizer.pos++; 79 tokenizer.line++; 80 } 81 else if(tokenizer.get == '/' && tokenizer.restLength > 1 && (tokenizer.next == '*' || tokenizer.next == '+')) 82 { 83 tokenizer.pos+= 2; 84 while(tokenizer.restLength && 85 !((tokenizer.get == '*' || tokenizer.get == '+') && (tokenizer.restLength > 1 && tokenizer.next == '/'))) 86 { 87 if(tokenizer.get == '\n') 88 tokenizer.line++; 89 tokenizer.pos++; 90 } 91 tokenizer.pos+= 2; 92 } 93 else 94 break; 95 } 96 } 97 98 HipDDFToken getToken(HipDDFTokenizer* tokenizer) 99 { 100 HipDDFToken ret; 101 advanceWhitespace(tokenizer); 102 if(tokenizer.pos == tokenizer.str.length) 103 return HipDDFToken("", HipDDFTokenType.endOfStream); 104 char C = tokenizer.get; 105 ulong start = tokenizer.pos; 106 tokenizer.pos++; 107 108 switch(C) 109 { 110 case '=': {ret.str = "=";ret.type = HipDDFTokenType.assignment; break;} 111 case ',': {ret.str = ",";ret.type = HipDDFTokenType.comma; break;} 112 case ';': {ret.str = ";";ret.type = HipDDFTokenType.semicolon; break;} 113 case ':': {ret.str = ":";ret.type = HipDDFTokenType.colon; break;} 114 case '(': {ret.str = "(";ret.type = HipDDFTokenType.openParenthesis; break;} 115 case ')': {ret.str = ")";ret.type = HipDDFTokenType.openParenthesis; break;} 116 case '[': {ret.str = "[";ret.type = HipDDFTokenType.openSquareBrackets; break;} 117 case ']': {ret.str = "]";ret.type = HipDDFTokenType.closeSquareBrackets; break;} 118 case '{': {ret.str = "{";ret.type = HipDDFTokenType.openCurlyBrackets; break;} 119 case '}': {ret.str = "}";ret.type = HipDDFTokenType.closeCurlyBrackets; break;} 120 case '\0':{ret.str = "\0";ret.type = HipDDFTokenType.endOfStream; break;} 121 case '"': 122 123 while(tokenizer.restLength && tokenizer.get != '"') 124 { 125 if(tokenizer.get == '\\') 126 tokenizer.pos++; 127 tokenizer.pos++; 128 } 129 tokenizer.pos++; //Advance the '"' 130 ret.str = tokenizer.str[start+1..tokenizer.pos-1]; //Remove the "" 131 ret.type = HipDDFTokenType.stringLiteral; 132 break; 133 default: 134 if(isNumeric(C)) //Check numeric literal 135 { 136 while(tokenizer.get && isNumeric(tokenizer.get)) 137 tokenizer.pos++; 138 ret.str = tokenizer.str[start..tokenizer.pos]; 139 ret.type = HipDDFTokenType.numberLiteral; 140 } 141 else if(isAlpha(C) || C == '_') //Check symbol 142 { 143 while(tokenizer.get.isNumeric || tokenizer.get.isAlpha || tokenizer.get =='_') 144 tokenizer.pos++; 145 ret.str = tokenizer.str[start..tokenizer.pos]; 146 //I'll consider creating a function for that if it happens to have more special symbols 147 if(ret.str == "__LINE__") 148 { 149 ret.str = to!string(tokenizer.line); 150 ret.type = HipDDFTokenType.numberLiteral; 151 } 152 else if(ret.str == "__FILE__") 153 { 154 ret.str = tokenizer.filename; 155 ret.type = HipDDFTokenType.stringLiteral; 156 } 157 else 158 ret.type = HipDDFTokenType.symbol; 159 } 160 else 161 { 162 ret.type = HipDDFTokenType.unknown; 163 ret.str = ""~to!string((cast(int)C)); 164 } 165 166 167 } 168 169 return ret; 170 } 171 172 /** 173 * This state must be used as a cyclic state for parsing it correctly. 174 */ 175 private enum HipDDFState 176 { 177 type, 178 symbol, 179 assignment 180 } 181 182 /** 183 * It must always find in the following order: 184 * 1: Type 185 * 2: Symbol 186 * 3: Assignment 187 * 4: Data 188 * By following this order, the data format will be really simple to follow. 189 */ 190 HipDDFObject parseHipDDF(string hdf) 191 { 192 HipDDFObjectInternal* obj = new HipDDFObjectInternal(""); 193 HipDDFTokenizer tokenizer; 194 tokenizer.str = hdf; 195 tokenizer.obj = obj; 196 197 HipDDFToken tk = HipDDFToken("", HipDDFTokenType.unknown); 198 HipDDFState state = HipDDFState.type; 199 tk = getToken(&tokenizer); 200 201 HipDDFVarInternal variable; 202 HipDDFVarInternal lastVar; 203 204 while(tk.type != HipDDFTokenType.endOfStream) 205 { 206 final switch(state) 207 { 208 case HipDDFState.type: 209 //Ask for symbol to be used as a type 210 tk = parseType(variable, tk, &tokenizer); 211 state = HipDDFState.symbol; 212 break; 213 case HipDDFState.symbol: //No parsing should be required for the symbol. 214 variable.symbol = tk.str; 215 state = HipDDFState.assignment; 216 assert(requireToken(&tokenizer, HipDDFTokenType.assignment, tk), "Expected variable assignment after the symbol '"~tk.toString); 217 break; 218 case HipDDFState.assignment: 219 tk = parseAssignment(variable, tk, &tokenizer); 220 obj.variables[variable.symbol] = variable; 221 lastVar = variable; 222 variable = HipDDFVarInternal.init; 223 state = HipDDFState.type; 224 break; 225 } 226 } 227 return cast(HipDDFObject)obj; 228 } 229 230 231 HipDDFToken parseAssignment(ref HipDDFVarInternal variable, HipDDFToken token, HipDDFTokenizer* tokenizer) 232 { 233 assert(token.type == HipDDFTokenType.assignment, "Tried to parse a non assigment token: "~token.toString); 234 for(;;) 235 { 236 token = getToken(tokenizer); 237 switch(token.type) 238 { 239 case HipDDFTokenType.stringLiteral: 240 case HipDDFTokenType.numberLiteral: 241 variable.value = token.str; 242 if(token.type == HipDDFTokenType.stringLiteral) 243 variable.length = cast(uint)token.str.length; 244 token = findToken(tokenizer, HipDDFTokenType.symbol); 245 return token; 246 case HipDDFTokenType.symbol: 247 assert((token.str in tokenizer.obj.variables) !is null, 248 "Variable '"~token.str~"' is not defined at line "~to!string(tokenizer.line)); 249 variable.value = tokenizer.obj.variables[token.str].value; 250 token = findToken(tokenizer, HipDDFTokenType.symbol); 251 return token; 252 case HipDDFTokenType.openSquareBrackets: 253 variable.value = "["; 254 token = getToken(tokenizer); 255 if(variable.isAssociativeArray) 256 { 257 while(token.type.isAssociativeArraySyntax) 258 { 259 variable.value~= token.str; 260 token = getToken(tokenizer); 261 } 262 } 263 else 264 { 265 int arrayCount = 0; 266 while( token.type.isArraySyntax) 267 { 268 if(token.type.isLiteral) 269 { 270 variable.value~= token.str; 271 arrayCount++; 272 } 273 else if(token.type == HipDDFTokenType.comma) 274 variable.value~= ","; 275 token = getToken(tokenizer); 276 } 277 variable.length = arrayCount; 278 } 279 assert(token.type == HipDDFTokenType.closeSquareBrackets, "Expected ], but received "~token.toString~ 280 " on variable "~variable.symbol); 281 variable.value~="]"; 282 token = findToken(tokenizer, HipDDFTokenType.symbol); 283 284 return token; 285 286 287 default: assert(0, "Unexpected token after assignment: "~token.toString); 288 } 289 } 290 assert(0, "Unknown error occurred for token "~token.toString); 291 } 292 293 /** 294 * The token passed is assumed to contain the initial type symbol. 295 * It will finish parsing by checking if it is an array, and (futurely) an associative array 296 */ 297 HipDDFToken parseType(ref HipDDFVarInternal variable, HipDDFToken token, HipDDFTokenizer* tokenizer) 298 { 299 assert(token.type == HipDDFTokenType.symbol, "Tried to parse a non type token: "~token.toString); 300 variable.type = token.str; 301 for(;;) 302 { 303 token = getToken(tokenizer); 304 switch(token.type) 305 { 306 case HipDDFTokenType.openSquareBrackets: 307 token = getToken(tokenizer); 308 if(token.type == HipDDFTokenType.closeSquareBrackets) 309 { 310 variable.type~= "[]"; 311 variable.isArray = true; 312 } 313 else if(token.type == HipDDFTokenType.numberLiteral) 314 { 315 variable.type~= "["~token.str; 316 variable.length = to!uint(token.str); 317 assert(requireToken(tokenizer, HipDDFTokenType.closeSquareBrackets, token), "Expected ], received "~token.toString); 318 variable.type~="]"; 319 variable.isArray = true; 320 } 321 else if(token.type == HipDDFTokenType.symbol) 322 { 323 variable.type~= "["~token.str; 324 assert(requireToken(tokenizer, HipDDFTokenType.closeSquareBrackets, token), "Expected ], received "~token.toString); 325 variable.type~="]"; 326 variable.isAssociativeArray = true; 327 } 328 assert(token.type == HipDDFTokenType.closeSquareBrackets, "Expected ], received "~token.toString); 329 assert(requireToken(tokenizer, HipDDFTokenType.symbol, token), "Expected a variable name, received "~token.toString); 330 return token; 331 case HipDDFTokenType.symbol: 332 return token; 333 default: 334 assert(0, "Error occurred with token " ~ token.toString); 335 } 336 } 337 assert(0, "Unknown error occurred: "~token.toString); 338 } 339 340 341 private HipDDFToken findToken(HipDDFTokenizer* tokenizer, HipDDFTokenType type) 342 { 343 HipDDFToken tk; 344 while(tokenizer.restLength > 0) 345 { 346 tk = getToken(tokenizer); 347 if(tk.type == type || tk.type == HipDDFTokenType.endOfStream) 348 return tk; 349 } 350 return HipDDFToken("", HipDDFTokenType.endOfStream); 351 } 352 353 pragma(inline) pure nothrow @safe @nogc bool isLiteral(HipDDFTokenType type) 354 { 355 return type == HipDDFTokenType.numberLiteral || type == HipDDFTokenType.stringLiteral; 356 } 357 /** 358 * Mainly a syntax creator 359 */ 360 private pragma(inline) bool requireToken(HipDDFTokenizer* tokenizer, HipDDFTokenType type, out HipDDFToken token) 361 { 362 token = getToken(tokenizer); 363 if(token.type != type) 364 return false; 365 return true; 366 } 367 368 struct HipDDFVarInternal 369 { 370 string type; 371 string value; 372 string symbol; 373 bool isArray; 374 bool isAssociativeArray; 375 uint length; 376 pure string toString() const {return type~" "~symbol~" = "~value;} 377 } 378 379 struct HipDDFObjectInternal 380 { 381 string symbol; 382 string filename; 383 HipDDFVarInternal[string] variables; 384 } 385 386 387 pragma(inline) bool isAlpha(char c) pure nothrow @safe @nogc{return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');} 388 pragma(inline) bool isEndOfLine(char c) pure nothrow @safe @nogc{return c == '\n' || c == '\r';} 389 pragma(inline) bool isNumeric(char c) pure nothrow @safe @nogc{return (c >= '0' && c <= '9') || (c == '-');} 390 pragma(inline) bool isWhitespace(char c) pure nothrow @safe @nogc{return (c == ' ' || c == '\t' || c.isEndOfLine);} 391 pragma(inline) bool isAssociativeArraySyntax(HipDDFTokenType type) pure nothrow @safe @nogc 392 { 393 return type.isLiteral || type == HipDDFTokenType.colon || type == HipDDFTokenType.comma; 394 } 395 pragma(inline) bool isArraySyntax(HipDDFTokenType type) pure nothrow @safe @nogc 396 { 397 return type.isLiteral || type == HipDDFTokenType.comma; 398 } 399 400 pure 401 { 402 //Var value 403 string parserVarType(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).type;} 404 string parserVarValue(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).value;} 405 string parserVarSymbol(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).symbol;} 406 bool parserIsVarArray(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).isArray;} 407 uint parserVarLength(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).length;} 408 409 string parserObjSymbol(const(void*) hddfobj){return (cast(HipDDFObjectInternal*)hddfobj).symbol;} 410 411 //Object 412 bool parserObjHasVar(const(void*) hddfobj, string name) 413 { 414 auto obj = cast(HipDDFObjectInternal*)hddfobj; 415 return (name in obj.variables) is null; 416 } 417 T parserObjGet(T)(const(void*)hddfobj, string name) 418 { 419 auto obj = cast(HipDDFObjectInternal*)hddfobj; 420 HipDDFVarInternal* v = name in obj.variables; 421 if(v !is null) 422 { 423 import std.traits:isArray, isStaticArray, isAssociativeArray, KeyType, ValueType; 424 assert(v.type == T.stringof, "Data expected '"~T.stringof~"' differs from the HipDDF : '"~v.toString~"'"); 425 426 static if(!is(T == string) && isArray!T) 427 { 428 assert(v.isArray, "Tried to get an array of type "~T.stringof~" from HipDDF which is not an array: '"~v.toString~"'"); 429 T ret; 430 string stringVal = ""; 431 int i = 1; 432 int index = 0; 433 //Means that the array has same value on every index 434 if(v.value[$-1] != ']') 435 { 436 static if(isStaticArray!T) 437 ret = to!(typeof(T.init[0]))(v.value); 438 else 439 assert(0, "Tried to assign a single value to a dynamic array"); 440 } 441 //Parse the values 442 else while(i < cast(int)v.value.length - 1) 443 { 444 if(v.value[i] == ',') 445 { 446 if(stringVal) 447 { 448 static if(!isStaticArray!T) 449 ret.length++; 450 ret[index++] = to!(typeof(T.init[0]))(stringVal); 451 } 452 stringVal = ""; 453 } 454 i++; 455 } 456 return ret; 457 } 458 else static if(isAssociativeArray!T) 459 { 460 assert(v.isAssociativeArray, "Tried to get associative array from variable "~v.toString); 461 int i = 1; 462 string keyString = ""; 463 string valueString = ""; 464 bool isCheckingForKey = true; 465 T ret; 466 scope void insertAA() 467 { 468 ret[to!(KeyType!T)(keyString)] = to!(ValueType!T)(valueString); 469 keyString = ""; 470 valueString = ""; 471 } 472 while(i < cast(int)v.value.length - 1) 473 { 474 switch(v.value[i]) 475 { 476 case ',': 477 isCheckingForKey = true; 478 insertAA(); 479 break; 480 case ':': 481 isCheckingForKey = false; 482 break; 483 default: 484 if(isCheckingForKey) 485 keyString~=v.value[i]; 486 else 487 valueString~=v.value[i]; 488 break; 489 } 490 i++; 491 } 492 if(keyString && valueString) 493 insertAA(); 494 return ret; 495 } 496 else 497 return to!T(v.value); 498 } 499 assert(0, "Could not find variable named '"~name~"'"); 500 } 501 }