| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 |
10x
10x
105x
101x
101x
101x
101x
101x
101x
101x
101x
101x
248x
248x
248x
117x
140x
117x
9x
108x
91x
17x
8x
7x
7x
7x
1x
123x
123x
123x
123x
5x
5x
118x
4x
4x
114x
5x
5x
114x
1x
113x
1x
112x
112x
112x
101x
296x
5x
5x
16x
16x
11x
5x
5x
101x
289x
8x
8x
7x
8x
101x
224x
65x
24x
65x
65x
101x
59x
59x
14x
14x
101x
202x
250x
21x
17x
1x
16x
17x
4x
4x
101x
381x
226x
12x
12x
9x
9x
5x
5x
200x
3x
3x
69x
8x
8x
275x
101x
101x
101x
101x
1138x
100x
1038x
282x
1x
281x
281x
756x
101x
100x
101x
1138x
1138x
10x
65x
5x
1058x
967x
91x
22x
69x
10x
| "use strict";
const QueryParser = require('./QueryParser');
const decoder = require('unidecode');
/**
* Helper functionality for parsing query representes
* in different formats into the one this package supports.
*/
class StringQueryParser extends QueryParser
{
constructor(config = {})
{
super(config);
}
parse(query, maxTokns = 15)
{
const tokens = Array.from(this.tokenise(query.substr(0, maxTokns * 10)));
// lets try some bottom up parsing
this.parseJoinAllExactMatchTokens(tokens);
this.parseJoinAllNotTokens(tokens);
this.parseJoinAllFieldTokens(tokens);
this.parseJoinAllRegularTokens(tokens);
this.parseJoinAllAndOrTokens(tokens);
const fresh = this.getDefault();
fresh.filter = this.treeToFilters(fresh, {
maxTokns
}, [tokens, 'and']) || [];
return fresh;
}
treeToFilters(fresh, config, tree)
{
config.tokenIndex = config.tokenIndex || 0;
let [values, filter] = tree, field;
switch (filter)
{
case 'and':
case 'or':
values = values.map(this.treeToFilters.bind(this, fresh, config))
.filter(x => x);
if (values.length === 0)
{
return undefined;
}
else if (values.length === 1)
{
return values[0];
}
else
{
return {
filter,
values
};
}
case 'not':
if (values)
{
values.splice(2, 0, ...tree.slice(2))
values = this.treeToFilters(fresh, config, values);
return {
filter,
values
};
}
else
{
return;
}
case 'a':
case 'exact':
filter = tree[3] || (tree[2] && this.fields[tree[2]] && this.fields[tree[2]].filters[0]) || (filter === 'exact' ? this.defaultExactFilter : this.defaultFilter);
field = tree[2] || this.defaultField;
values = values.split(' ');
if (field === 'sort')
{
fresh.sort = this.sort.indexOf(values[0]) === -1 ? fresh.sort : values[0];
return;
}
else if (field === 'order')
{
fresh.order = values[0].toLowerCase() === 'asc' ? 'asc' : 'dsc';
return
}
else
{
if (!this.fields[field] && this.fields[this.defaultField].filters.indexOf(field) !== -1)
{
filter = field;
field = this.defaultField;
}
if (!this.fields[field])
{
return
}
else if (this.fields[field].filters.indexOf(filter) === -1)
{
return
}
else Iif (config.tokenIndex > config.maxTokns)
{
return;
}
else
{
config.tokenIndex++;
return {
filter,
field,
values,
};
}
}
default:
console.log('UNHANDELLED', filter, values);
}
}
parseJoinAllExactMatchTokens(tokens)
{
for (let i = 0; i < tokens.length; i++)
{
if (tokens[i][1] === '"')
{
tokens[i] = ['', 'exact'];
while (i + 1 < tokens.length)
{
const next = tokens.splice(i + 1, 1)[0];
if (next[1] !== '"')
{
tokens[i][0] += ' ' + next[0];
}
else
{
break;
}
}
tokens[i][0] = tokens[i][0].trim();
}
}
}
parseJoinAllNotTokens(tokens)
{
for (let i = 0; i < tokens.length; i++)
{
if (tokens[i][1] === 'not')
{
let next = false;
while (!next && i + 1 < tokens.length)
{
next = tokens.splice(i + 1, 1)[0];
}
tokens[i][0] = next;
}
}
}
parseJoinAllFieldTokens(tokens)
{
for (let i = 0; i < tokens.length; i++)
{
if (tokens[i][1] === ':' && i > 0 && i + 1 < tokens.length)
{
if (tokens[i - 1].length > 2)
{
tokens[i + 1].push(tokens[i - 1][2]);
}
tokens[i + 1].push(tokens[i - 1][0]);
tokens.splice(--i, 2);
}
}
}
parseJoinAllRegularTokens(tokens)
{
for (let i = 1; i < tokens.length; i++)
{
const signature = tokens[i - 1][1] + tokens[i - 0][1];
if (signature === 'aa' && tokens[i - 1].length === 2 && tokens[i - 0].length === 2)
{
tokens[i - 1][0] = tokens[i - 1][0] + ' ' + tokens[i - 0][0];
tokens.splice(i--, 1)
}
}
}
parseJoinAllAndOrTokens(tokens)
{
for (let operator of ['and', 'or'])
{
for (let i = 0; i < tokens.length; i++)
{
if (tokens[i][1] === operator)
{
if (i > 0 && i + 1 < tokens.length)
{
if (tokens[i - 1][1] === operator)
{
tokens[i - 1][0].push(tokens[i + 1]);
}
else
{
tokens[i - 1] = [
[tokens[i - 1], tokens[i + 1]], operator
];
}
tokens.splice(i--, 2);
}
else Eif (typeof tokens[i][0] === 'string')
{
tokens.splice(i--, 1);
}
}
}
}
}
/**
* Probably the smallest tokeniser ever written
*/
* tokenise(string)
{
for (let [char, class_] of this.lemmatise(string))
{
switch (class_)
{
case 'a':
switch (char.toLowerCase())
{
case 'and':
yield [char, 'and'];
continue;
case 'or':
yield [char, 'or'];
continue;
case 'not':
yield [char, 'not'];
continue;
}
break;
case '-':
yield [char, 'not'];
continue;
case ' ':
continue;
case '0':
yield [char, 'a'];
continue;
}
yield [char, class_];
}
}
* lemmatise(string)
{
let last = '';
let lastClass = false;
let tokens = Array.from(this.lex(string));
for (let [char, class_] of tokens)
{
if (!lastClass)
{
[last, lastClass] = [char, class_];
}
else if (lastClass !== class_ && !(lastClass === 'a' && class_ === '0') && !(lastClass === '0' && char === 'e') && !(lastClass === '0' && char === '-'))
{
if (last === '-' && class_ === '0')
{
[last, lastClass] = [last + char, class_];
}
else
{
yield [last, lastClass];
[last, lastClass] = [char, class_];
}
}
else
{
last += char;
}
}
if (last !== '')
{
yield [last, lastClass];
}
}
* lex(string)
{
for (let char of decoder(string + '')
.replace(/\s+/g, ' ')
.trim())
{
yield [char, this.classifyChar(char)];
}
}
classifyChar(char)
{
switch (char)
{
// whitespace
case '"':
case "'":
case "(":
case ")":
case "[":
case "]":
return '"';
case ':':
return ':';
case '-':
return '-';
default:
if ((char >= 'A' && char <= 'Z') || (char >= 'a' && char <= 'z'))
{
return 'a'
}
else if ((char >= '0' && char <= '9') || (char === '.'))
{
return '0'
}
else
{
return ' ';
}
}
}
}
module.exports = StringQueryParser;
|