1 module hipddf.parser;
2 import hipddf.types;
3 import std.conv: to;
4 
5 
6 enum HipDDFTokenType
7 {
8     assignment,
9     comma,
10     colon,
11     semicolon,
12     openParenthesis,
13     closeParenthesis,
14     openSquareBrackets,
15     closeSquareBrackets,
16     openCurlyBrackets,
17     closeCurlyBrackets,
18     endOfStream,
19     symbol,
20     stringLiteral,
21     numberLiteral,
22     unknown
23 }
24 
25 struct HipDDFToken
26 {
27     string str;
28     HipDDFTokenType type;
29 
30     string toString()
31     {
32         string T;
33         swt: final switch(type)
34         {
35             static foreach(m; __traits(allMembers, HipDDFTokenType))
36             {
37                 case __traits(getMember, HipDDFTokenType, m):
38                     T = m.stringof;
39                     break swt;
40             }
41         }
42         return str~" ("~T~")";
43     }
44 }
45 
46 struct HipDDFTokenizer
47 {
48     string str;
49     string filename;
50     ulong pos;
51     uint line;
52     HipDDFObjectInternal* obj;
53 
54     /** Returns str[pos] */
55     pragma(inline) @nogc nothrow @safe char get(){return str[pos];}
56     /** Returns str[pos+1], used for not needing to access every time its members */
57     pragma(inline) @nogc nothrow @safe char next(){return str[pos+1];}
58     /** Returns str.length - pos */
59     pragma(inline) @nogc nothrow @safe int restLength(){return cast(int)(str.length - pos);}
60 
61 
62 }
63 
64 nothrow @safe @nogc
65 private void advanceWhitespace(HipDDFTokenizer* tokenizer)
66 {
67     while(tokenizer.restLength > 0)
68     {
69         if(isWhitespace(tokenizer.get))
70         {
71             if(tokenizer.get == '\n')
72                 tokenizer.line++;
73             tokenizer.pos++;
74         }
75         else if(tokenizer.get == '/' && tokenizer.restLength > 1 && tokenizer.next == '/')
76         {
77             while(!isEndOfLine(tokenizer.get))
78                 tokenizer.pos++;
79             tokenizer.line++;
80         }
81         else if(tokenizer.get == '/' && tokenizer.restLength > 1 && (tokenizer.next == '*' || tokenizer.next == '+'))
82         {
83             tokenizer.pos+= 2;
84             while(tokenizer.restLength && 
85             !((tokenizer.get == '*' || tokenizer.get == '+') && (tokenizer.restLength > 1 && tokenizer.next == '/')))
86             {
87                 if(tokenizer.get == '\n')
88                     tokenizer.line++;
89                 tokenizer.pos++;
90             }
91             tokenizer.pos+= 2;
92         }
93         else
94             break;
95     }
96 }
97 
98 HipDDFToken getToken(HipDDFTokenizer* tokenizer)
99 {
100     HipDDFToken ret;
101     advanceWhitespace(tokenizer);
102     if(tokenizer.pos == tokenizer.str.length)
103         return HipDDFToken("", HipDDFTokenType.endOfStream);
104     char C = tokenizer.get;
105     ulong start = tokenizer.pos;
106     tokenizer.pos++;
107 
108     switch(C)
109     {
110         case '=':  {ret.str = "=";ret.type = HipDDFTokenType.assignment; break;}
111         case ',':  {ret.str = ",";ret.type = HipDDFTokenType.comma; break;}
112         case ';':  {ret.str = ";";ret.type = HipDDFTokenType.semicolon; break;}
113         case ':':  {ret.str = ":";ret.type = HipDDFTokenType.colon; break;}
114         case '(':  {ret.str = "(";ret.type = HipDDFTokenType.openParenthesis; break;}
115         case ')':  {ret.str = ")";ret.type = HipDDFTokenType.openParenthesis; break;}
116         case '[':  {ret.str = "[";ret.type = HipDDFTokenType.openSquareBrackets; break;}
117         case ']':  {ret.str = "]";ret.type = HipDDFTokenType.closeSquareBrackets; break;}
118         case '{':  {ret.str = "{";ret.type = HipDDFTokenType.openCurlyBrackets; break;}
119         case '}':  {ret.str = "}";ret.type = HipDDFTokenType.closeCurlyBrackets; break;}
120         case '\0':{ret.str = "\0";ret.type = HipDDFTokenType.endOfStream; break;}
121         case '"':
122 
123             while(tokenizer.restLength && tokenizer.get != '"')
124             {
125                 if(tokenizer.get == '\\')
126                     tokenizer.pos++;
127                 tokenizer.pos++;
128             }
129             tokenizer.pos++; //Advance the '"'
130             ret.str = tokenizer.str[start+1..tokenizer.pos-1]; //Remove the ""
131             ret.type = HipDDFTokenType.stringLiteral;
132             break;
133         default:
134             if(isNumeric(C)) //Check numeric literal
135             {
136                 while(tokenizer.get && isNumeric(tokenizer.get))
137                     tokenizer.pos++;
138                 ret.str = tokenizer.str[start..tokenizer.pos];
139                 ret.type = HipDDFTokenType.numberLiteral;
140             }
141             else if(isAlpha(C) || C == '_') //Check symbol
142             {
143                 while(tokenizer.get.isNumeric || tokenizer.get.isAlpha || tokenizer.get =='_')
144                     tokenizer.pos++;
145                 ret.str = tokenizer.str[start..tokenizer.pos];
146                 //I'll consider creating a function for that if it happens to have more special symbols
147                 if(ret.str == "__LINE__")
148                 {
149                     ret.str = to!string(tokenizer.line);
150                     ret.type = HipDDFTokenType.numberLiteral;
151                 }
152                 else if(ret.str == "__FILE__")
153                 {
154                     ret.str = tokenizer.filename;
155                     ret.type = HipDDFTokenType.stringLiteral;
156                 }
157                 else
158                     ret.type = HipDDFTokenType.symbol;
159             }
160             else
161             {
162                 ret.type = HipDDFTokenType.unknown;
163                 ret.str = ""~to!string((cast(int)C));
164             }
165 
166 
167     }
168 
169     return ret;
170 }
171 
172 /**
173 *   This state must be used as a cyclic state for parsing it correctly.
174 */
175 private enum HipDDFState
176 {
177     type,
178     symbol,
179     assignment
180 }
181 
182 /**
183 *   It must always find in the following order:
184 *   1: Type
185 *   2: Symbol
186 *   3: Assignment
187 *   4: Data
188 *   By following this order, the data format will be really simple to follow.
189 */
190 HipDDFObject parseHipDDF(string hdf)
191 {
192     HipDDFObjectInternal* obj = new HipDDFObjectInternal("");
193     HipDDFTokenizer tokenizer;
194     tokenizer.str = hdf;
195     tokenizer.obj = obj;
196 
197     HipDDFToken tk = HipDDFToken("", HipDDFTokenType.unknown);
198     HipDDFState state = HipDDFState.type;
199     tk = getToken(&tokenizer);
200 
201     HipDDFVarInternal variable;
202     HipDDFVarInternal lastVar;
203     
204     while(tk.type != HipDDFTokenType.endOfStream)
205     {
206         final switch(state)
207         {
208             case HipDDFState.type:
209                 //Ask for symbol to be used as a type
210                 tk = parseType(variable, tk, &tokenizer);
211                 state = HipDDFState.symbol;
212                 break;
213             case HipDDFState.symbol: //No parsing should be required for the symbol.
214                 variable.symbol = tk.str;
215                 state = HipDDFState.assignment;
216                 assert(requireToken(&tokenizer, HipDDFTokenType.assignment, tk), "Expected variable assignment after the symbol '"~tk.toString);
217                 break;
218             case HipDDFState.assignment:
219                 tk = parseAssignment(variable, tk, &tokenizer);
220                 obj.variables[variable.symbol] = variable;
221                 lastVar = variable;
222                 variable = HipDDFVarInternal.init;
223                 state = HipDDFState.type;
224                 break;
225         }
226     }
227     return cast(HipDDFObject)obj;
228 }
229 
230 
231 HipDDFToken parseAssignment(ref HipDDFVarInternal variable, HipDDFToken token, HipDDFTokenizer* tokenizer)
232 {
233     assert(token.type == HipDDFTokenType.assignment, "Tried to parse a non assigment token: "~token.toString);
234     for(;;)
235     {
236         token = getToken(tokenizer);
237         switch(token.type)
238         {
239             case HipDDFTokenType.stringLiteral:
240             case HipDDFTokenType.numberLiteral:
241                 variable.value = token.str;
242                 if(token.type == HipDDFTokenType.stringLiteral)
243                     variable.length = cast(uint)token.str.length;
244                 token = findToken(tokenizer,  HipDDFTokenType.symbol);
245                 return token;
246             case HipDDFTokenType.symbol:
247                 assert((token.str in tokenizer.obj.variables) !is null, 
248                 "Variable '"~token.str~"' is not defined at line "~to!string(tokenizer.line));
249                 variable.value = tokenizer.obj.variables[token.str].value;
250                 token = findToken(tokenizer, HipDDFTokenType.symbol);
251                 return token;
252             case HipDDFTokenType.openSquareBrackets:
253                 variable.value = "[";
254                 token = getToken(tokenizer);
255                 if(variable.isAssociativeArray)
256                 {
257                     while(token.type.isAssociativeArraySyntax)
258                     {
259                         variable.value~= token.str;
260                         token = getToken(tokenizer);
261                     }
262                 }
263                 else
264                 {
265                     int arrayCount = 0;
266                     while( token.type.isArraySyntax)
267                     {
268                         if(token.type.isLiteral)
269                         {
270                             variable.value~= token.str;
271                             arrayCount++;
272                         }
273                         else if(token.type == HipDDFTokenType.comma)
274                             variable.value~= ",";
275                         token = getToken(tokenizer);
276                     }
277                     variable.length = arrayCount;
278                 }
279                 assert(token.type == HipDDFTokenType.closeSquareBrackets, "Expected ], but received "~token.toString~
280                 " on variable "~variable.symbol);
281                 variable.value~="]";
282                 token = findToken(tokenizer, HipDDFTokenType.symbol);
283 
284                 return token;
285             
286             
287             default: assert(0,  "Unexpected token after assignment: "~token.toString);
288         }
289     }
290     assert(0, "Unknown error occurred for token "~token.toString);
291 }
292 
293 /**
294 *   The token passed is assumed to contain the initial type symbol.
295 *   It will finish parsing by checking if it is an array, and (futurely) an associative array
296 */
297 HipDDFToken parseType(ref HipDDFVarInternal variable, HipDDFToken token, HipDDFTokenizer* tokenizer)
298 {
299     assert(token.type == HipDDFTokenType.symbol, "Tried to parse a non type token: "~token.toString);
300     variable.type = token.str;
301     for(;;)
302     {
303         token = getToken(tokenizer);
304         switch(token.type)
305         {
306             case HipDDFTokenType.openSquareBrackets:
307                 token = getToken(tokenizer);
308                 if(token.type == HipDDFTokenType.closeSquareBrackets)
309                 {
310                     variable.type~= "[]";
311                     variable.isArray = true;
312                 }
313                 else if(token.type == HipDDFTokenType.numberLiteral)
314                 {
315                     variable.type~= "["~token.str;
316                     variable.length = to!uint(token.str);
317                     assert(requireToken(tokenizer, HipDDFTokenType.closeSquareBrackets, token), "Expected ], received "~token.toString);
318                     variable.type~="]";
319                     variable.isArray = true;
320                 }
321                 else if(token.type == HipDDFTokenType.symbol)
322                 {
323                     variable.type~= "["~token.str;
324                     assert(requireToken(tokenizer, HipDDFTokenType.closeSquareBrackets, token), "Expected ], received "~token.toString);
325                     variable.type~="]";
326                     variable.isAssociativeArray = true;
327                 }
328                 assert(token.type == HipDDFTokenType.closeSquareBrackets, "Expected ], received "~token.toString);
329                 assert(requireToken(tokenizer, HipDDFTokenType.symbol, token), "Expected a variable name, received "~token.toString);
330                 return token;
331             case HipDDFTokenType.symbol:
332                 return token;
333             default: 
334                 assert(0, "Error occurred with token " ~ token.toString);
335         }
336     }
337     assert(0, "Unknown error occurred: "~token.toString);
338 }
339 
340 
341 private HipDDFToken findToken(HipDDFTokenizer* tokenizer, HipDDFTokenType type)
342 {
343     HipDDFToken tk;
344     while(tokenizer.restLength > 0)
345     {
346         tk = getToken(tokenizer);
347         if(tk.type == type || tk.type == HipDDFTokenType.endOfStream)
348             return tk;
349     }
350     return HipDDFToken("", HipDDFTokenType.endOfStream);
351 }
352 
353 pragma(inline) pure nothrow @safe @nogc bool isLiteral(HipDDFTokenType type)
354 {
355     return type == HipDDFTokenType.numberLiteral || type == HipDDFTokenType.stringLiteral;
356 }
357 /**
358 *   Mainly a syntax creator
359 */
360 private pragma(inline) bool requireToken(HipDDFTokenizer* tokenizer, HipDDFTokenType type, out HipDDFToken token)
361 {
362     token = getToken(tokenizer);
363     if(token.type != type)
364         return false;
365     return true;
366 }
367 
368 struct HipDDFVarInternal
369 {
370     string type;
371     string value;
372     string symbol;
373     bool isArray;
374     bool isAssociativeArray;
375     uint length;
376     pure string toString() const {return type~" "~symbol~" = "~value;}
377 }
378 
379 struct HipDDFObjectInternal
380 {
381     string symbol;
382     string filename;
383     HipDDFVarInternal[string] variables;
384 }
385 
386 
387 pragma(inline) bool isAlpha(char c) pure nothrow @safe @nogc{return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');}
388 pragma(inline) bool isEndOfLine(char c) pure nothrow @safe @nogc{return c == '\n' || c == '\r';}
389 pragma(inline) bool isNumeric(char c) pure nothrow @safe @nogc{return (c >= '0' && c <= '9') || (c == '-');}
390 pragma(inline) bool isWhitespace(char c) pure nothrow @safe @nogc{return (c == ' ' || c == '\t' || c.isEndOfLine);}
391 pragma(inline) bool isAssociativeArraySyntax(HipDDFTokenType type) pure nothrow @safe @nogc
392 {
393     return type.isLiteral || type == HipDDFTokenType.colon || type == HipDDFTokenType.comma;
394 }
395 pragma(inline) bool isArraySyntax(HipDDFTokenType type) pure nothrow @safe @nogc
396 {
397     return type.isLiteral  || type == HipDDFTokenType.comma;
398 }
399 
400 pure
401 {
402     //Var value
403     string parserVarType(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).type;}
404     string parserVarValue(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).value;}
405     string parserVarSymbol(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).symbol;}
406     bool parserIsVarArray(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).isArray;}
407     uint parserVarLength(const(void*) hddfvar){return (cast(HipDDFVarInternal*)hddfvar).length;}
408 
409     string parserObjSymbol(const(void*) hddfobj){return (cast(HipDDFObjectInternal*)hddfobj).symbol;}
410 
411     //Object
412     bool parserObjHasVar(const(void*) hddfobj, string name)
413     {
414         auto obj = cast(HipDDFObjectInternal*)hddfobj;
415         return (name in obj.variables) is null;
416     }
417     T parserObjGet(T)(const(void*)hddfobj, string name)
418     {
419         auto obj = cast(HipDDFObjectInternal*)hddfobj;
420         HipDDFVarInternal* v = name in obj.variables;
421         if(v !is null)
422         {
423             import std.traits:isArray, isStaticArray, isAssociativeArray, KeyType, ValueType;
424             assert(v.type == T.stringof, "Data expected '"~T.stringof~"' differs from the HipDDF : '"~v.toString~"'");
425 
426             static if(!is(T == string) && isArray!T)
427             {
428                 assert(v.isArray,  "Tried to get an array of type "~T.stringof~" from HipDDF which is not an array: '"~v.toString~"'");
429                 T ret;
430                 string stringVal = "";
431                 int i = 1;
432                 int index = 0;
433                 //Means that the array has same value on every index
434                 if(v.value[$-1] != ']')
435                 {
436                     static if(isStaticArray!T)
437                         ret = to!(typeof(T.init[0]))(v.value);
438                     else
439                         assert(0, "Tried to assign a single value to a dynamic array");
440                 }
441                 //Parse the values  
442                 else while(i < cast(int)v.value.length - 1)
443                 {
444                     if(v.value[i] == ',')
445                     {
446                         if(stringVal)
447                         {
448                             static if(!isStaticArray!T)
449                                 ret.length++;
450                             ret[index++] = to!(typeof(T.init[0]))(stringVal);
451                         }
452                         stringVal = "";
453                     }
454                     i++;
455                 }
456                 return ret;
457             }
458             else static if(isAssociativeArray!T)
459             {
460                 assert(v.isAssociativeArray, "Tried to get associative array from variable "~v.toString);
461                 int i = 1;
462                 string keyString = "";
463                 string valueString = "";
464                 bool isCheckingForKey = true;
465                 T ret;
466                 scope void insertAA()
467                 {
468                     ret[to!(KeyType!T)(keyString)] = to!(ValueType!T)(valueString);
469                     keyString = "";
470                     valueString = "";
471                 }
472                 while(i < cast(int)v.value.length - 1)
473                 {
474                     switch(v.value[i])
475                     {
476                         case ',':
477                             isCheckingForKey = true;
478                             insertAA();
479                             break;
480                         case ':':
481                             isCheckingForKey = false;
482                             break;
483                         default:
484                             if(isCheckingForKey)
485                                 keyString~=v.value[i];
486                             else
487                                 valueString~=v.value[i];
488                             break;
489                     }
490                     i++;
491                 }
492                 if(keyString && valueString)
493                     insertAA();
494                 return ret;
495             }
496             else
497                 return to!T(v.value);
498         }
499         assert(0, "Could not find variable named '"~name~"'");
500     }
501 }