1 /**
2  * Tokens as separate entities
3  *
4  * Copyright: © 2017 Andrey Kabylin
5  * License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6  */
7 
8 module rpdl.token;
9 
10 import std.ascii;
11 import std.uni : toLower;
12 import std.algorithm.iteration : map;
13 import std.algorithm.searching;
14 import std.conv;
15 
16 import rpdl.stream;
17 import rpdl.lexer : LexerError;
18 
19 /// Base token
20 class Token {
21 public:
22     /// Available tokens
23     enum Code {
24         none,
25         id,  /// `Token` is `IdToken` and has `identifier` property
26         number,  /// `Token` is `NumberToken` and has `number` property
27         string,  /// `Token` is `StringToken` and has `identifier`, `str` and `utfStr` properties
28         boolean,  /// `Token` has `boolean` property
29         include,  /// include keyword
30     }
31 
32     this(SymbolStream stream) {
33         this.stream = stream;
34         this.p_indent = stream.indent;
35         this.p_line = stream.line;
36         this.p_pos = stream.pos;
37     }
38 
39     @property string identifier() { return p_identifier; }
40     @property float number() { return p_number; }
41     @property bool boolean() { return p_boolean; }
42     @property string str() { return p_string; }
43     @property dstring utfStr() { return p_utfstring; }
44     @property Code code() { return p_code; }
45     @property int indent() { return p_indent; }
46     @property char symbol() { return p_symbol; }
47     @property int line() { return p_line; }
48     @property int pos() { return p_pos; }
49 
50 protected:
51     SymbolStream stream;
52     char p_symbol;
53 
54     // values
55     string p_identifier;
56     float p_number;
57     bool p_boolean;
58     string p_string;
59     dstring p_utfstring;
60     Code p_code;
61     int p_indent;
62     int p_line;
63     int p_pos;
64 }
65 
66 /// Special symbol like '=', '+', '%' etc.
67 class SymbolToken : Token {
68     this(SymbolStream stream, in char symbol) {
69         super(stream);
70         this.p_symbol = symbol;
71     }
72 }
73 
74 /// String value - get this token if symbol start with $(GREEN ")
75 class StringToken : Token {
76     this(SymbolStream stream) {
77         super(stream);
78         this.lex();
79     }
80 
81 private:
82     void lex() {
83         do {
84             stream.read();
85 
86             if (stream.lastChar == '\\')
87                 lexEscape();
88 
89             if (stream.lastChar != '\"')
90                 p_string ~= stream.lastChar;
91         } while (stream.lastChar != '\"' && !stream.eof);
92 
93         if (stream.eof)
94             throw new LexerError(stream.line, stream.pos, "unexpected end of file");
95         else stream.read();
96 
97         p_code = Code..string;
98         p_utfstring = to!dstring(p_string);
99     }
100 
101     void lexEscape() {
102         stream.read();
103 
104         switch (stream.lastChar) {
105             case 'n' : p_string ~= "\n"; break;
106             case 'r' : p_string ~= "\r"; break;
107             case '\\': p_string ~= "\\"; break;
108             case '\"': p_string ~= "\""; break;
109             case 'u': p_string ~= readUnicode(); break;
110             default:
111                 auto message = "undefined escape sequence \\" ~ stream.lastChar;
112                 throw new LexerError(stream.line, stream.pos, message);
113         }
114 
115         stream.read();
116     }
117 
118     dchar readUnicode() {
119         enum hexChars = "0123456789abcdefABCDEF";
120         string unicode = "";
121 
122         for (int i = 0; i < 4; ++i) {
123             stream.read();
124 
125             if (!hexChars.canFind(stream.lastChar) || stream.eof)
126                 throw new LexerError(stream.line, stream.pos, "bad unicode");
127 
128             unicode ~= stream.lastChar;
129         }
130 
131         return unicode.to!ulong(16).to!dchar;
132     }
133 }
134 
135 /// Number Float or Integer $(GREEN [0-9]+ (.[0-9]+)?)
136 class NumberToken : Token {
137     this(SymbolStream stream, in bool negative = false) {
138         super(stream);
139         this.negative = negative;
140         lex();
141     }
142 
143 private:
144     bool negative = false;
145 
146     bool isNumberChar() {
147         return isDigit(stream.lastChar) || stream.lastChar == '.';
148     }
149 
150     void lex() {
151         string numStr = negative ? "-" : "";
152         p_code = Code.number;
153         bool hasComma = false;
154 
155         while (isNumberChar()) {
156             if (stream.lastChar == '.') {
157                 if (hasComma)
158                     break;
159 
160                 hasComma = true;
161             }
162 
163             numStr ~= stream.lastChar;
164             stream.read();
165         }
166 
167         p_number = to!float(numStr);
168     }
169 }
170 
171 /// Identifier $(GREEN [a-zA-Z_][a-zA-Z0-9_]*)
172 class IdToken : Token {
173     this(SymbolStream stream) {
174         super(stream);
175         p_code = Code.id;
176         lex();
177     }
178 
179 private:
180     bool isIdChar() {
181         return isAlphaNum(stream.lastChar) || stream.lastChar == '_';
182     }
183 
184     void lex() {
185         uint lastIndent;
186 
187         while (isIdChar()) {
188             p_identifier ~= stream.lastChar;
189             lastIndent = stream.indent;
190             stream.read();
191         }
192 
193         switch (identifier) {
194             case "include":
195                 p_code = Code.include;
196                 return;
197 
198             case "true":
199                 p_code = Code.boolean;
200                 p_boolean = true;
201                 return;
202 
203             case "false":
204                 p_code = Code.boolean;
205                 p_boolean = false;
206                 return;
207 
208             default:
209                 p_code = Code.id;
210         }
211     }
212 }