BAM
Abstract Machine for Bottom-Up Evaluation with the Push Method
lex.h
Go to the documentation of this file.
1 // ============================================================================
2 // Project: Deductive Database
3 // Filename: lex.h
4 // Purpose: Lexical Scanner
5 // Last Change: 04.08.2017
6 // Language: C++
7 // EMail: brass@informatik.uni-halle.de
8 // WWW: http://www.informatik.uni-halle.de/~brass/
9 // Address: Feldschloesschen 15, D-06120 Halle (Saale), GERMANY
10 // Copyright: (c) 1997-2017 by Stefan Brass
11 // License: See file "LICENSE" for copying conditions.
12 // Note: There is no warranty at all - this code may contain bugs.
13 // ============================================================================
14 
15 
22 //=============================================================================
23 // Include File Frame:
24 //=============================================================================
25 
26 #ifndef LEX_INCLUDED
27 #define LEX_INCLUDED
28 
29 //=============================================================================
30 // Used Types and Macros:
31 //=============================================================================
32 
33 #ifndef VER_INCLUDED
34 #include "../base/ver.h"
35 #endif
36 
37 #ifndef STR_INCLUDED
38 #include "../base/str.h"
39 #endif
40 
41 #ifndef CHECK_INCLUDED
42 #include "../base/check.h"
43 #endif
44 
45 #ifndef IN_INCLUDED
46 #include "../input/in.h"
47 #endif
48 
49 #ifndef TOK_INCLUDED
50 #include "tok.h"
51 #endif
52 
53 #ifndef SYN_INCLUDED
54 #include "syn.h"
55 #endif
56 
57 //=============================================================================
58 // Private Constants:
59 //=============================================================================
60 
61 //-----------------------------------------------------------------------------
62 // LEX_LINESIZE: Maximal number of characters in an input line.
63 //-----------------------------------------------------------------------------
64 
65 static const int LEX_LINESIZE = 4096;
66 
67 //-----------------------------------------------------------------------------
68 // LEX_BLOCKSIZE: Number of input characters to read in one chunck.
69 //-----------------------------------------------------------------------------
70 
71 static const int LEX_BLOCKSIZE = 8192;
72 
73 //-----------------------------------------------------------------------------
74 // LEX_MAX_INT: Maximal positive value for integer tokens.
75 //-----------------------------------------------------------------------------
76 
77 static const int LEX_MAX_INT = 65535;
78 
79 //-----------------------------------------------------------------------------
80 // LEX_MIN_INT: Maximal negative value for integer tokens.
81 //-----------------------------------------------------------------------------
82 
83 static const int LEX_MIN_INT = -65536;
84 // We assume that LEX_MIN_INT <= -LEX_MAX_INT (as usual).
85 // -LEX_MAX_INT must still be an int value,
86 // or there will be overflows.
87 // In addition, the code will wrongly declare an out-of-range integer
88 // if LEX_MIN_INT <= -LEX_MAX_INT does not hold.
89 
90 //-----------------------------------------------------------------------------
91 // Magic: Magic number (identifies objects of this class for debugging).
92 //-----------------------------------------------------------------------------
93 
94 static const long LEX_MAGIC = 0x4C45580AL; // 'LEX\n'
95 
96 //=============================================================================
97 // Class for Currently Scanned File:
98 //=============================================================================
99 
100 class lex_c {
101  public:
102 
103 //-----------------------------------------------------------------------------
104 // Methods:
105 //-----------------------------------------------------------------------------
106 
107  // Constructor:
108  lex_c(in_c* input);
109 
110  // Destructor:
111  ~lex_c(void);
112 
113  // tok_type: Get type of current token.
114  inline tok_t tok_type() const
115  { CHECK_VALID("lex_c::tok_type");
116  return(TokType); }
117 
118  // tok_ptr: Get pointer to start of token.
119  inline str_t tok_ptr() const
120  { CHECK_VALID("lex_c::tok_ptr");
121  return(TokPtr); }
122 
123  // tok_end: Get pointer to current character (just after token).
124  inline str_t tok_end() const
125  { CHECK_VALID("lex_c::tok_end");
126  return(TokEnd); }
127 
128  // tok_int: Integer value of current token (must be TOK_INT).
129  inline int tok_int() const
130  { CHECK_VALID("lex_c::tok_int");
131  CHECK(TokType == TOK_INT,
132  "lex_c::tok_int: Current token is no integer");
133  return(TokInt); }
134 
135  // next: Read next token.
136  void next(void);
137 
138  // filename: Return the current filename.
139  inline str_t filename() const
140  { CHECK_VALID("lex_c::filename");
141  return(Input->filename()); }
142 
143  // line_no: Return the current line number.
144  inline int line_no() const
145  { CHECK_VALID("lex_c::line_no");
146  return(LineNo); }
147 
148  // show_err: Show error for the current token in the current line.
149  void show_err(syn_t syn) const;
150 
151 //-----------------------------------------------------------------------------
152 // Debugging Support:
153 //-----------------------------------------------------------------------------
154 
155 #if VER_DEBUG
156  // Integrity check:
157  public:
158  str_t check() const;
159 
160  // Magic number (identifies objects of this class for debugging).
161  private:
162  long Magic; // Must be "LEX_MAGIC".
163 #endif
164 
165 //-----------------------------------------------------------------------------
166 // Copy-constructor and assignment operator are not supported for this class:
167 //-----------------------------------------------------------------------------
168 
169  private:
170 
171  lex_c(const lex_c& lex); // Not implemented
172  lex_c& operator=(const lex_c& lex); // Not implemented
173 
174 
175 //-----------------------------------------------------------------------------
176 // Private Class Members:
177 //-----------------------------------------------------------------------------
178 
179  private:
180 
181  // Input: The input stream for this file.
182  in_t Input;
183 
184  // Eof: Was the end of file reached?
185  bool Eof;
186 
187  // Buf: Input buffer.
188  char Buf[LEX_LINESIZE+LEX_BLOCKSIZE];
189 
190  // Bytes: Number of Bytes in Buffer.
191  int Bytes;
192 
193  // LineNo: Current line number.
194  int LineNo;
195 
196  // LinePtr: Begin of current line in buffer.
197  char *LinePtr;
198 
199  // LineEnd: End of current line in buffer (points to '\n').
200  char *LineEnd;
201 
202  // TokType: Type of current token.
203  tok_t TokType;
204 
205  // TokPtr: Begin of current token in buffer.
206  char *TokPtr;
207 
208  // TokEnd: End of current token in buffer
209  // (points to next char after token).
210  char *TokEnd;
211 
212  // TokInt: Integer value of the current token.
213  int TokInt;
214 
215 //-----------------------------------------------------------------------------
216 // Auxillary Functions:
217 //-----------------------------------------------------------------------------
218 
219  private:
220 
221  // getline: Set LinePtr and LineEnd on the next line,
222  // fill buffer if necessary:
223  void getline();
224 
225  // scan_int: Convert current token into integer.
226  bool scan_int(register str_t p, register str_t end_p, int *value);
227 
228 //=============================================================================
229 // End of Class:
230 //=============================================================================
231 
232 };
233 
234 //-----------------------------------------------------------------------------
235 // Define pointer type:
236 //-----------------------------------------------------------------------------
237 
238 typedef lex_c *lex_t;
239 
240 //-----------------------------------------------------------------------------
241 // Define null pointer:
242 //-----------------------------------------------------------------------------
243 
244 #define LEX_NULL (static_cast<lex_t>(0))
245 
246 //=============================================================================
247 // End of Include File:
248 //=============================================================================
249 
250 #endif
251 
enum syn_enum syn_t
Definition: in.h:50
#define CHECK_VALID(EX)
Definition: check.h:85
const char * str_t
Definition: str.h:41
Syntax error messages.
enum tok_enum tok_t
Definition: lex.h:100
#define CHECK(EX, MSG)
Definition: check.h:69
Token types (for lexical scanner)