1 /** 2 * Tokens as separate entities 3 * 4 * Copyright: © 2017 Andrey Kabylin 5 * License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 */ 7 8 module rpdl.token; 9 10 import std.ascii; 11 import std.uni : toLower; 12 import std.algorithm.iteration : map; 13 import std.algorithm.searching; 14 import std.conv; 15 16 import rpdl.stream; 17 import rpdl.lexer : LexerError; 18 19 /// Base token 20 class Token { 21 public: 22 /// Available tokens 23 enum Code { 24 none, 25 id, /// `Token` is `IdToken` and has `identifier` property 26 number, /// `Token` is `NumberToken` and has `number` property 27 string, /// `Token` is `StringToken` and has `identifier`, `str` and `utfStr` properties 28 boolean, /// `Token` has `boolean` property 29 include, /// include keyword 30 } 31 32 this(SymbolStream stream) { 33 this.stream = stream; 34 this.p_indent = stream.indent; 35 this.p_line = stream.line; 36 this.p_pos = stream.pos; 37 } 38 39 @property string identifier() { return p_identifier; } 40 @property float number() { return p_number; } 41 @property bool boolean() { return p_boolean; } 42 @property string str() { return p_string; } 43 @property dstring utfStr() { return p_utfstring; } 44 @property Code code() { return p_code; } 45 @property int indent() { return p_indent; } 46 @property char symbol() { return p_symbol; } 47 @property int line() { return p_line; } 48 @property int pos() { return p_pos; } 49 50 protected: 51 SymbolStream stream; 52 char p_symbol; 53 54 // values 55 string p_identifier; 56 float p_number; 57 bool p_boolean; 58 string p_string; 59 dstring p_utfstring; 60 Code p_code; 61 int p_indent; 62 int p_line; 63 int p_pos; 64 } 65 66 /// Special symbol like '=', '+', '%' etc. 67 class SymbolToken : Token { 68 this(SymbolStream stream, in char symbol) { 69 super(stream); 70 this.p_symbol = symbol; 71 } 72 } 73 74 /// String value - get this token if symbol start with $(GREEN ") 75 class StringToken : Token { 76 this(SymbolStream stream) { 77 super(stream); 78 this.lex(); 79 } 80 81 private: 82 void lex() { 83 do { 84 stream.read(); 85 86 if (stream.lastChar == '\\') 87 lexEscape(); 88 89 if (stream.lastChar != '\"') 90 p_string ~= stream.lastChar; 91 } while (stream.lastChar != '\"' && !stream.eof); 92 93 if (stream.eof) 94 throw new LexerError(stream.line, stream.pos, "unexpected end of file"); 95 else stream.read(); 96 97 p_code = Code..string; 98 p_utfstring = to!dstring(p_string); 99 } 100 101 void lexEscape() { 102 stream.read(); 103 104 switch (stream.lastChar) { 105 case 'n' : p_string ~= "\n"; break; 106 case 'r' : p_string ~= "\r"; break; 107 case '\\': p_string ~= "\\"; break; 108 case '\"': p_string ~= "\""; break; 109 case 'u': p_string ~= readUnicode(); break; 110 default: 111 auto message = "undefined escape sequence \\" ~ stream.lastChar; 112 throw new LexerError(stream.line, stream.pos, message); 113 } 114 115 stream.read(); 116 } 117 118 dchar readUnicode() { 119 enum hexChars = "0123456789abcdefABCDEF"; 120 string unicode = ""; 121 122 for (int i = 0; i < 4; ++i) { 123 stream.read(); 124 125 if (!hexChars.canFind(stream.lastChar) || stream.eof) 126 throw new LexerError(stream.line, stream.pos, "bad unicode"); 127 128 unicode ~= stream.lastChar; 129 } 130 131 return unicode.to!ulong(16).to!dchar; 132 } 133 } 134 135 /// Number Float or Integer $(GREEN [0-9]+ (.[0-9]+)?) 136 class NumberToken : Token { 137 this(SymbolStream stream, in bool negative = false) { 138 super(stream); 139 this.negative = negative; 140 lex(); 141 } 142 143 private: 144 bool negative = false; 145 146 bool isNumberChar() { 147 return isDigit(stream.lastChar) || stream.lastChar == '.'; 148 } 149 150 void lex() { 151 string numStr = negative ? "-" : ""; 152 p_code = Code.number; 153 bool hasComma = false; 154 155 while (isNumberChar()) { 156 if (stream.lastChar == '.') { 157 if (hasComma) 158 break; 159 160 hasComma = true; 161 } 162 163 numStr ~= stream.lastChar; 164 stream.read(); 165 } 166 167 p_number = to!float(numStr); 168 } 169 } 170 171 /// Identifier $(GREEN [a-zA-Z_][a-zA-Z0-9_]*) 172 class IdToken : Token { 173 this(SymbolStream stream) { 174 super(stream); 175 p_code = Code.id; 176 lex(); 177 } 178 179 private: 180 bool isIdChar() { 181 return isAlphaNum(stream.lastChar) || stream.lastChar == '_'; 182 } 183 184 void lex() { 185 uint lastIndent; 186 187 while (isIdChar()) { 188 p_identifier ~= stream.lastChar; 189 lastIndent = stream.indent; 190 stream.read(); 191 } 192 193 switch (identifier) { 194 case "include": 195 p_code = Code.include; 196 return; 197 198 case "true": 199 p_code = Code.boolean; 200 p_boolean = true; 201 return; 202 203 case "false": 204 p_code = Code.boolean; 205 p_boolean = false; 206 return; 207 208 default: 209 p_code = Code.id; 210 } 211 } 212 }