1 /******************************************************************************
2 *
3 * Copyright (C) 2006, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * ID: lex 18.10.06 0.0.A.
11 *
12 * Author: Alexey Krivonogov ( gentee )
13 *
14 ******************************************************************************/
15
16 #include "lex.h"
17 #include "lextbl.h"
18 //! temporary
19 #include "../os/user/defines.h"
20
21 //----------------------------------------------------------------------------
22
23 uint STDCALL gentee_lexptr( pubyte in, plex pl, parr output )
24 {
25 plexitem pli, pn;
26 plexexp pexp = 0;
27 uint pos, i, posoff = 0;
28 // pubyte in = buf_ptr( input ); // Входящий буфер
29 puint pmain = ( puint )buf_ptr( &pl->tbl );
30 puint ptbl = pmain; // текущая строка разбора
31 uint cur = 0; // Номер текущего разбираемого символа
32
33 uint newstate, cmd, nameoff, len;
34 uint flag, val, keyword = 0, istext = 0;
35 lextry ltry;
36 uint sn_pos, sn_len = 0;
37 uint isnew = 0;
38 uint pair = 0;
39
40 arr_clear( &pl->state );
41 arr_clear( &pl->expr );
42 arr_clear( &pl->litems );
43 arr_clear( &pl->mitems );
44 pl->imulti = 0;
45 // arr_appendnum( &pl->state, 0 );
46 do
47 {
48 val = ptbl[ in[ cur ]]; // Получаем новое состояние
49 rettry:
50 // printf("val=%x %c cur=%i li=%i\n", val, in[cur], cur, arr_count( &pl->litems ));
51 if ( pexp && ptbl == pmain )
52 {
53 if ( in[cur] == pexp->left )
54 pexp->count++;
55 if ( in[cur] == pexp->right )
56 {
57 if ( pexp->count )
58 pexp->count--;
59 else
60 {
61 val = ( pexp->state + 1 ) << 16;
62 val |= LEXF_ITSTATE | LEXF_POS;
63 arr_pop( &pl->expr );
64 pexp = ( plexexp )arr_top( &pl->expr );
65 }
66 }
67 }
68 flag = val & 0xFFFF;
69 if ( isnew )
70 {
71 flag |= LEXF_ITSTATE | LEXF_POS;
72 // isnew = 0;
73 }
74 if ( flag & LEXF_PAIR )
75 {
76 switch ( in[ cur ] )
77 {
78 case '[' :
79 pair++;
80 flag = 0;
81 val = LEX_OK;
82 break;
83 case ']' :
84 if ( pair )
85 {
86 pair--;
87 val = LEX_OK;
88 flag = 0;
89 }
90 break;
91 }
92 }
93 if ( flag & LEXF_MULTI )
94 {
95 uint curm, count, k;
96 plexmulti pmulti;
97 pubyte pcmp;
98
99 curm = val >> 24;
100 count = (( val >> 16 ) & 0xff );
101 for ( i = 1; i <= count; i++ )
102 {
103 pmulti = ( plexmulti )arr_ptr( &pl->mitems, curm * 8 + i );
104 pcmp = ( pubyte )&pmulti->chars;
105 // printf( "CMP XXX %s ==% s len = %i\n",
106 // pcmp, in + cur, pmulti->len );
107 for ( k = 1; k < pmulti->len; k++ )
108 {
109 if ( pcmp[k] != in[ cur + k ] )
110 break;
111 }
112 if ( k == pmulti->len )
113 {
114 val = pmulti->value;
115 pos = cur;
116 cur += pmulti->len - 1;
117 if ( val & LEXF_POS )
118 posoff = pmulti->len - 1;
119 // printf( "Multi XXX %i val=%x pos = %i len = %i\n",
120 // cur, val, pos, pmulti->len );
121 goto rettry;
122 }
123 }
124 val = (( plexmulti )arr_ptr( &pl->mitems, curm * 8 ))->value;
125 // printf("Ooops 1= %x\n", val );
126 goto rettry;
127 }
128 newstate = ( val >> 16 ) & 0xFF;
129 cmd = val & 0xFF000000;
130 if ( flag & LEXF_TRY )
131 {
132 ltry.pos = cur;
133 // ltry.state = ((( pubyte )ptbl - buf_ptr( &pl->tbl )) >> 10 ) + 1;
134 ltry.ret = cmd >> 24;
135 }
136 if ( flag & LEXF_RET )
137 {
138 // printf("Ret=%i\n", cur );
139 cur = ltry.pos;//arr_pop( &pl->itry );
140 val = *( puint )( buf_ptr( &pl->tbl ) + (( ltry.ret - 1 ) << 10 ) +
141 sizeof( uint ));
142 // val &= ~LEXF_RET;
143 goto rettry;
144 }
145
146 if ( flag & LEXF_POS )
147 {
148 pos = cur - posoff;
149 posoff = 0;
150 }
151 if ( flag & LEXF_ITCMD || flag & LEXF_ITSTATE )
152 {
153 if ( istext && in[cur] == 0xa )
154 {
155 newstate = 2;
156 istext = 0;
157 }
158 if ( keyword ) // Надо проверить предыдущую лексему на keyword
159 {
160 ubyte curch = in[ pli->pos + pli->len ];
161
162 keyword = 0;
163 in[ pli->pos + pli->len ] = 0;
164 pli->value = hash_getuint( &pl->keywords, in + pli->pos );
165 // printf( ">%s< %i = %i\n", in + pli->pos, pli->len, pli->value );
166 if ( pli->value == 255 &&
167 *( puint )( in + pli->pos ) == 0x74786574 )// text for gentee
168 {
169 in[ pli->pos + pli->len ] = curch;
170 cur = pli->pos + pli->len;
171 istext = 1;
172 continue;
173 }
174 else
175 in[ pli->pos + pli->len ] = curch;
176 }
177 // Добавляем лексему
178 pli = arr_append( output );
179 pli->type = ( flag & LEXF_ITCMD ) ? cmd : ( val & 0xFF0000 );
180 pli->pos = pos;
181 if ( isnew )
182 {
183 pli->type = (((( pubyte )ptbl - buf_ptr( &pl->tbl )) >> 10 ) + 1 ) << 16 ;
184 // printf( "TYPE NEW = %i\n", pli->type );
185 isnew = 0;
186 }
187 pli->value = 0;
188 pli->len = cur - pos + 1;
189 if ( flag & LEXF_STAY )
190 pli->len--;
191 if ( flag & LEXF_VALUE )
192 {
193 for ( i = 0; i < pli->len; i++ )
194 pli->value |= in[ pos + i ] << ( i << 3 );
195 }
196 if ( flag & LEXF_KEYWORD )
197 keyword = 1;
198 // printf("LEX_ITEM %i %i state=%x val=%x\n", pli->pos, pli->len, pli->type, val & 0xFF0000 );
199 }
200 switch ( cmd )
201 {
202 case LEX_STRNAME:
203 if ( sn_len )
204 {
205 if ( mem_cmp( in + cur, in + sn_pos, sn_len ))
206 {
207 cur++;
208 continue;
209 }
210 sn_len = 0;
211 }
212 else
213 {
214 sn_pos = cur;
215 while ( in[ cur ] && in[cur] != ']' )
216 cur++;
217 sn_len = cur - sn_pos + 1;
218 // printf("STR_NAME %i %i\n", sn_pos, sn_len );
219 cur--;
220 }
221 break;
222 case LEX_EXPR:
223 pexp = ( plexexp )arr_append( &pl->expr );
224 pexp->left = in[cur];
225 pexp->right = in[cur] != '{' ? ')' : '}';
226 pexp->count = 0;
227 pexp->state = (( pubyte )ptbl - buf_ptr( &pl->tbl )) >> 10;
228 case LEX_OK:
229 pli->len = cur - pli->pos + 1;//++;
230 // printf("LEX_OK %i %i cur = %i c=%c\n", pli->pos, pli->len, cur, in[ cur ] );
231 break;
232 case LEX_GTNAME:
233 pn = ( plexitem )arr_ptr( output, arr_getlast( &pl->litems ));
234 nameoff = pn->pos + 1;
235 if ( in[ nameoff ] == '|' )
236 nameoff++;
237 if ( in[ nameoff ] == '*' )
238 nameoff++;
239 len = pn->len - ( nameoff - pn->pos );
240
241 for ( i = 0; i < len; i++ )
242 {
243 if ( in[ cur + i ] != in[ nameoff + i ])
244 break;
245 }
246 // printf("OK %i %c == %c\n", i, in[ cur + i], in[ nameoff + i ]);
247
248 if ( i == len && in[ cur + i ] == '>' )
249 {
250 cur += i;
251 continue;
252 }
253 else
254 {
255 // cur++;
256 // val = //ptbl[ 1 ];
257 cur = ltry.pos;//arr_pop( &pl->itry );
258 val = *( puint )( buf_ptr( &pl->tbl ) + (( ltry.ret - 1 ) << 10 ) +
259 sizeof( uint ));
260 // printf( "GTNAME=%i %x\n", cur, val );
261 goto rettry;
262 }
263 break;
264 }
265 if ( flag & LEXF_PUSHLI )
266 {
267 arr_appendnum( &pl->litems, arr_count( output ) - 1 );
268 // printf("PUSHLI now = %i\n", arr_count( &pl->litems ));
269 }
270 if ( flag & LEXF_PUSH )
271 {
272 // arr_appendnum( &pl->litems, arr_count( output ) - 1 );
273 i = (( pubyte )ptbl - buf_ptr( &pl->tbl )) >> 10;
274 arr_appendnum( &pl->state, i + 1 );
275 // printf("PUSH newstate = %x now = %i first=%i\n", i + 1,
276 // arr_count( &pl->state ), *( puint )arr_ptr( &pl->state, 0 ));
277 // if ( flag & LEXF_NAME )
278 // arr_appendnum( &pl->litems, arr_count( output ) - 1 );
279 }
280 if ( flag & LEXF_POPLI )
281 {
282 arr_pop( &pl->litems );
283 // printf("POPLI now = %i\n", arr_count( &pl->litems ));
284 }
285 if ( flag & LEXF_POP )// && !newstate )
286 {
287 uint l = arr_pop( &pl->state );
288 if ( !newstate )
289 newstate = l;
290 if ( flag & LEXF_NEW )
291 {
292 isnew = 1;
293 // printf("NEW\n");
294 // printf("POP newstate = %x remain = %i first=%i\n", newstate,
295 // arr_count( &pl->state ), *( puint )arr_ptr( &pl->state, 0 ));
296 }
297 // printf("POP newstate = %x remain = %i first=%i\n", newstate,
298 // arr_count( &pl->state ), *( puint )arr_ptr( &pl->state, 0 ));
299 }
300 if ( newstate )
301 ptbl = ( puint )( buf_ptr( &pl->tbl ) + (( newstate - 1 ) << 10 ));
302 if ( !( flag & LEXF_STAY ))
303 cur++;
304
305 } while ( cmd != LEX_STOP );
306
307 return 1;
308 }
309
310 //----------------------------------------------------------------------------
311
312 uint STDCALL gentee_lex( pbuf input, plex pl, parr output )
313 {
314 return gentee_lexptr( buf_ptr( input ), pl, output );
315 }
316
317 //----------------------------------------------------------------------------
318
319 plex STDCALL lex_init( plex pl, puint ptbl )
320 {
321 if ( !pl )
322 {
323 pl = ( plex )mem_alloc( sizeof( lex ));
324 pl->alloced = 1;
325 }
326 else
327 pl->alloced = 0;
328
329 buf_init( &pl->tbl );
330 arr_init( &pl->state, sizeof( uint ));
331 arr_init( &pl->litems, sizeof( uint ));
332 arr_init( &pl->mitems, sizeof( lexmulti ));
333 arr_appenditems( &pl->mitems, 64 * 8 );
334 pl->imulti = 0;
335 hash_init( &pl->keywords, sizeof( uint ));
336 arr_init( &pl->expr, sizeof( lexexp ));
337
338 if ( ptbl )
339 lex_tbl( pl, ptbl );
340 return pl;
341 }
342
343 //----------------------------------------------------------------------------
344
345 void STDCALL lex_delete( plex pl )
346 {
347 buf_delete( &pl->tbl );
348 arr_delete( &pl->state );
349 arr_delete( &pl->litems );
350 arr_delete( &pl->mitems );
351 arr_delete( &pl->expr );
352 hash_delete( &pl->keywords );
353 if ( pl->alloced )
354 mem_free( pl );
355 }
356