1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Lexical analysis for genksyms.
4  * Copyright 1996, 1997 Linux International.
5  *
6  * New implementation contributed by Richard Henderson <rth@tamu.edu>
7  * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
8  *
9  * Taken from Linux modutils 2.4.22.
10  */
11 
12 %{
13 
14 #include <limits.h>
15 #include <stdbool.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 
20 #include "genksyms.h"
21 #include "parse.tab.h"
22 
23 /* We've got a two-level lexer here.  We let flex do basic tokenization
24    and then we categorize those basic tokens in the second stage.  */
25 #define YY_DECL		static int yylex1(void)
26 
27 %}
28 
29 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
30 
31 O_INT			0[0-7]*
32 D_INT			[1-9][0-9]*
33 X_INT			0[Xx][0-9A-Fa-f]+
34 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
35 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
36 
37 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
38 EXP			[Ee][+-]?[0-9]+
39 F_SUF			[FfLl]
40 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
41 
42 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
43 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
44 
45 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
46 
47 /* We don't do multiple input files.  */
48 %option noyywrap
49 
50 %option noinput
51 
52 %%
53 
54 u?int(8|16|32|64)x(1|2|4|8|16)_t	return BUILTIN_INT_KEYW;
55 
56  /* Keep track of our location in the original source files.  */
57 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
58 ^#.*\n					cur_line++;
59 \n					cur_line++;
60 
61  /* Ignore all other whitespace.  */
62 [ \t\f\v\r]+				;
63 
64 
65 {STRING}				return STRING;
66 {CHAR}					return CHAR;
67 {IDENT}					return IDENT;
68 
69  /* The Pedant requires that the other C multi-character tokens be
70     recognized as tokens.  We don't actually use them since we don't
71     parse expressions, but we do want whitespace to be arranged
72     around them properly.  */
73 {MC_TOKEN}				return OTHER;
74 {INT}					return INT;
75 {REAL}					return REAL;
76 
77 "..."					return DOTS;
78 
79  /* All other tokens are single characters.  */
80 .					return yytext[0];
81 
82 
83 %%
84 
85 /* Bring in the keyword recognizer.  */
86 
87 #include "keywords.c"
88 
89 
90 /* Macros to append to our phrase collection list.  */
91 
92 /*
93  * We mark any token, that that equals to a known enumerator, as
94  * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
95  * the only problem is struct and union members:
96  *    enum e { a, b }; struct s { int a, b; }
97  * but in this case, the only effect will be, that the ABI checksums become
98  * more volatile, which is acceptable. Also, such collisions are quite rare,
99  * so far it was only observed in include/linux/telephony.h.
100  */
101 #define _APP(T,L)	do {						   \
102 			  cur_node = next_node;				   \
103 			  next_node = xmalloc(sizeof(*next_node));	   \
104 			  next_node->next = cur_node;			   \
105 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
106 			  cur_node->tag =				   \
107 			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
108 			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
109 			  cur_node->in_source_file = in_source_file;       \
110 			} while (0)
111 
112 #define APP		_APP(yytext, yyleng)
113 
114 
115 /* The second stage lexer.  Here we incorporate knowledge of the state
116    of the parser to tailor the tokens that are returned.  */
117 
118 /*
119  * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an
120  * IDENT. We need a hint from the parser to handle this accurately.
121  */
122 bool dont_want_type_specifier;
123 
124 int
125 yylex(void)
126 {
127   static enum {
128     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
129     ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
130   } lexstate = ST_NOTSTARTED;
131 
132   static int suppress_type_lookup, dont_want_brace_phrase;
133   static struct string_list *next_node;
134   static char *source_file;
135 
136   int token, count = 0;
137   struct string_list *cur_node;
138 
139   if (lexstate == ST_NOTSTARTED)
140     {
141       next_node = xmalloc(sizeof(*next_node));
142       next_node->next = NULL;
143       lexstate = ST_NORMAL;
144     }
145 
146 repeat:
147   token = yylex1();
148 
149   if (token == 0)
150     return 0;
151   else if (token == FILENAME)
152     {
153       char *file, *e;
154 
155       /* Save the filename and line number for later error messages.  */
156 
157       if (cur_filename)
158 	free(cur_filename);
159 
160       file = strchr(yytext, '\"')+1;
161       e = strchr(file, '\"');
162       *e = '\0';
163       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
164       cur_line = atoi(yytext+2);
165 
166       if (!source_file) {
167         source_file = xstrdup(cur_filename);
168         in_source_file = 1;
169       } else {
170         in_source_file = (strcmp(cur_filename, source_file) == 0);
171       }
172 
173       goto repeat;
174     }
175 
176   switch (lexstate)
177     {
178     case ST_NORMAL:
179       APP;
180       switch (token)
181 	{
182 	case IDENT:
183 	  {
184 	    int r = is_reserved_word(yytext, yyleng);
185 	    if (r >= 0)
186 	      {
187 		switch (token = r)
188 		  {
189 		  case ATTRIBUTE_KEYW:
190 		    lexstate = ST_ATTRIBUTE;
191 		    count = 0;
192 		    goto repeat;
193 		  case ASM_KEYW:
194 		    lexstate = ST_ASM;
195 		    count = 0;
196 		    goto repeat;
197 		  case TYPEOF_KEYW:
198 		    lexstate = ST_TYPEOF;
199 		    count = 0;
200 		    goto repeat;
201 
202 		  case STRUCT_KEYW:
203 		  case UNION_KEYW:
204 		  case ENUM_KEYW:
205 		    dont_want_brace_phrase = 3;
206 		    suppress_type_lookup = 2;
207 		    goto fini;
208 
209 		  case EXPORT_SYMBOL_KEYW:
210 		      goto fini;
211 
212 		  case STATIC_ASSERT_KEYW:
213 		    lexstate = ST_STATIC_ASSERT;
214 		    count = 0;
215 		    goto repeat;
216 		  }
217 	      }
218 	    if (!suppress_type_lookup && !dont_want_type_specifier)
219 	      {
220 		if (find_symbol(yytext, SYM_TYPEDEF, 1))
221 		  token = TYPE;
222 	      }
223 	  }
224 	  break;
225 
226 	case '[':
227 	  lexstate = ST_BRACKET;
228 	  count = 1;
229 	  goto repeat;
230 
231 	case '{':
232 	  if (dont_want_brace_phrase)
233 	    break;
234 	  lexstate = ST_BRACE;
235 	  count = 1;
236 	  goto repeat;
237 
238 	case '=': case ':':
239 	  lexstate = ST_EXPRESSION;
240 	  break;
241 
242 	default:
243 	  break;
244 	}
245       break;
246 
247     case ST_ATTRIBUTE:
248       APP;
249       switch (token)
250 	{
251 	case '(':
252 	  ++count;
253 	  goto repeat;
254 	case ')':
255 	  if (--count == 0)
256 	    {
257 	      lexstate = ST_NORMAL;
258 	      token = ATTRIBUTE_PHRASE;
259 	      break;
260 	    }
261 	  goto repeat;
262 	default:
263 	  goto repeat;
264 	}
265       break;
266 
267     case ST_ASM:
268       APP;
269       switch (token)
270 	{
271 	case '(':
272 	  ++count;
273 	  goto repeat;
274 	case ')':
275 	  if (--count == 0)
276 	    {
277 	      lexstate = ST_NORMAL;
278 	      token = ASM_PHRASE;
279 	      break;
280 	    }
281 	  goto repeat;
282 	default:
283 	  goto repeat;
284 	}
285       break;
286 
287     case ST_TYPEOF_1:
288       if (token == IDENT)
289 	{
290 	  if (is_reserved_word(yytext, yyleng) >= 0
291 	      || find_symbol(yytext, SYM_TYPEDEF, 1))
292 	    {
293 	      yyless(0);
294 	      unput('(');
295 	      lexstate = ST_NORMAL;
296 	      token = TYPEOF_KEYW;
297 	      break;
298 	    }
299 	  _APP("(", 1);
300 	}
301 	lexstate = ST_TYPEOF;
302 	/* FALLTHRU */
303 
304     case ST_TYPEOF:
305       switch (token)
306 	{
307 	case '(':
308 	  if ( ++count == 1 )
309 	    lexstate = ST_TYPEOF_1;
310 	  else
311 	    APP;
312 	  goto repeat;
313 	case ')':
314 	  APP;
315 	  if (--count == 0)
316 	    {
317 	      lexstate = ST_NORMAL;
318 	      token = TYPEOF_PHRASE;
319 	      break;
320 	    }
321 	  goto repeat;
322 	default:
323 	  APP;
324 	  goto repeat;
325 	}
326       break;
327 
328     case ST_BRACKET:
329       APP;
330       switch (token)
331 	{
332 	case '[':
333 	  ++count;
334 	  goto repeat;
335 	case ']':
336 	  if (--count == 0)
337 	    {
338 	      lexstate = ST_NORMAL;
339 	      token = BRACKET_PHRASE;
340 	      break;
341 	    }
342 	  goto repeat;
343 	default:
344 	  goto repeat;
345 	}
346       break;
347 
348     case ST_BRACE:
349       APP;
350       switch (token)
351 	{
352 	case '{':
353 	  ++count;
354 	  goto repeat;
355 	case '}':
356 	  if (--count == 0)
357 	    {
358 	      lexstate = ST_NORMAL;
359 	      token = BRACE_PHRASE;
360 	      break;
361 	    }
362 	  goto repeat;
363 	default:
364 	  goto repeat;
365 	}
366       break;
367 
368     case ST_EXPRESSION:
369       switch (token)
370 	{
371 	case '(': case '[': case '{':
372 	  ++count;
373 	  APP;
374 	  goto repeat;
375 	case '}':
376 	  /* is this the last line of an enum declaration? */
377 	  if (count == 0)
378 	    {
379 	      /* Put back the token we just read so's we can find it again
380 		 after registering the expression.  */
381 	      unput(token);
382 
383 	      lexstate = ST_NORMAL;
384 	      token = EXPRESSION_PHRASE;
385 	      break;
386 	    }
387 	  /* FALLTHRU */
388 	case ')': case ']':
389 	  --count;
390 	  APP;
391 	  goto repeat;
392 	case ',': case ';':
393 	  if (count == 0)
394 	    {
395 	      /* Put back the token we just read so's we can find it again
396 		 after registering the expression.  */
397 	      unput(token);
398 
399 	      lexstate = ST_NORMAL;
400 	      token = EXPRESSION_PHRASE;
401 	      break;
402 	    }
403 	  APP;
404 	  goto repeat;
405 	default:
406 	  APP;
407 	  goto repeat;
408 	}
409       break;
410 
411     case ST_STATIC_ASSERT:
412       APP;
413       switch (token)
414 	{
415 	case '(':
416 	  ++count;
417 	  goto repeat;
418 	case ')':
419 	  if (--count == 0)
420 	    {
421 	      lexstate = ST_NORMAL;
422 	      token = STATIC_ASSERT_PHRASE;
423 	      break;
424 	    }
425 	  goto repeat;
426 	default:
427 	  goto repeat;
428 	}
429       break;
430 
431     default:
432       exit(1);
433     }
434 fini:
435 
436   if (suppress_type_lookup > 0)
437     --suppress_type_lookup;
438 
439   /*
440    *  __attribute__() can be placed immediately after the 'struct' keyword.
441    *  e.g.) struct __attribute__((__packed__)) foo { ... };
442    */
443   if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0)
444     --dont_want_brace_phrase;
445 
446   yylval = &next_node->next;
447 
448   return token;
449 }
450