1 | |
package nl.beesting.beangenerator.generator.re; |
2 | |
|
3 | |
import java.util.HashMap; |
4 | |
import java.util.HashSet; |
5 | |
import java.util.Map; |
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | 70 | public class REParser { |
13 | |
|
14 | |
private HashMap<Integer, ReverseRExpression> expressions; |
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
public Map<Integer, ReverseRExpression> parseRE(String regExpression) throws ReverseREParseException { |
20 | 70 | if (regExpression == null) { |
21 | 0 | throw new IllegalArgumentException(getClass().getName() + ".parseRE(String regExpression)" |
22 | |
+ " regExpression is now allowed to null."); |
23 | |
} |
24 | 70 | expressions = new HashMap<Integer, ReverseRExpression>(); |
25 | 70 | parseExpressions(regExpression.trim().toCharArray()); |
26 | 70 | return expressions; |
27 | |
} |
28 | |
|
29 | |
|
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
private void parseExpressions(char[] mask) throws ReverseREParseException { |
35 | 70 | int cl = 0; |
36 | 70 | int start = 1; |
37 | 400 | while (cl < mask.length) { |
38 | 330 | System.out.println("next loop (cl=" + cl + ", start=" + start + ")"); |
39 | 330 | int newcl = parseSubexpression(start, mask, cl); |
40 | 330 | if (newcl <= cl) { |
41 | 0 | System.err.println("Parsing went into a loop. New index (" + newcl + ") is less than, or equals old index (" |
42 | |
+ cl + ")"); |
43 | 0 | break; |
44 | |
} |
45 | 330 | cl = newcl; |
46 | 330 | start++; |
47 | 330 | } |
48 | 70 | } |
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
private int parseSubexpression(int nr, char[] mask, int startIndex) throws ReverseREParseException { |
60 | 330 | int r = startIndex; |
61 | 330 | switch (mask[startIndex]) { |
62 | |
case '[': |
63 | 60 | r = createRangeExpresssion(nr, mask, startIndex + 1); |
64 | 60 | break; |
65 | |
case '{': |
66 | 50 | r = parseLengthExpresssion(nr, mask, startIndex + 1); |
67 | 50 | break; |
68 | |
case '.': |
69 | 0 | r = parseAnyExpression(nr, mask, startIndex); |
70 | 0 | break; |
71 | |
case '?': |
72 | 10 | r = parseGreedyExpression(nr, mask, startIndex); |
73 | 10 | break; |
74 | |
case '+': |
75 | 10 | r = parseGreedyExpression(nr, mask, startIndex); |
76 | 10 | break; |
77 | |
case '*': |
78 | 0 | r = parseGreedyExpression(nr, mask, startIndex); |
79 | 0 | break; |
80 | |
case '|': |
81 | 50 | r = parseOrExpression(nr, mask, startIndex); |
82 | 50 | break; |
83 | |
case '(': |
84 | 10 | r = parseGroupExpression(nr, mask, startIndex, true); |
85 | 10 | break; |
86 | |
case ')': |
87 | 10 | r = parseGroupExpression(nr, mask, startIndex, false); |
88 | 10 | break; |
89 | |
default: |
90 | 130 | r = parseCharExpression(nr, mask, startIndex); |
91 | |
} |
92 | 330 | return r; |
93 | |
} |
94 | |
|
95 | |
private int parseGroupExpression(int nr, char[] mask, int startIndex, boolean start) { |
96 | 20 | ReverseRExpression expression = new ReverseGroupExpression(start ? ReverseRExpression.GROUP_START:ReverseRExpression.GROUP_END); |
97 | 20 | this.expressions.put(new Integer(nr), expression); |
98 | 20 | return startIndex + 1; |
99 | |
} |
100 | |
|
101 | |
private int parseOrExpression(int nr, char[] mask, int startIndex) { |
102 | 50 | ReverseRExpression expression = new ReverseOrExpression(ReverseRExpression.OR); |
103 | 50 | this.expressions.put(new Integer(nr), expression); |
104 | 50 | return startIndex + 1; |
105 | |
} |
106 | |
|
107 | |
private int parseCharExpression(int nr, char[] mask, int startIndex) { |
108 | 130 | ReverseRExpression expression = new ReverseRExpression(ReverseRangeRExpression.CHAR); |
109 | 130 | expression.generationInstruction = new Character(mask[startIndex]); |
110 | 130 | this.expressions.put(new Integer(nr), expression); |
111 | 130 | return startIndex + 1; |
112 | |
} |
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
private int parseAnyExpression(int nr, char[] mask, int startIndex) { |
123 | 0 | this.expressions.put(nr, ReverseRExpression.ANY_EXPR); |
124 | 0 | return startIndex + 1; |
125 | |
} |
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
private int parseGreedyExpression(int nr, char[] mask, int startIndex) { |
136 | 20 | ReverseRExpression expression = new ReverseGreedyExpression(mask[startIndex]); |
137 | 20 | this.expressions.put(nr, expression); |
138 | 20 | return startIndex + 1; |
139 | |
} |
140 | |
|
141 | |
|
142 | |
|
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
|
150 | |
private int parseLengthExpresssion(int nr, char[] mask, int startIndex) throws ReverseREParseException { |
151 | 50 | int currentIndex = startIndex; |
152 | 50 | StringBuffer startLengthString = new StringBuffer(); |
153 | 50 | StringBuffer endLengthString = new StringBuffer(); |
154 | 50 | boolean startMode = true; |
155 | 150 | while (mask[currentIndex] != '}' && currentIndex < mask.length) { |
156 | 100 | if (Character.isDigit(mask[currentIndex])) { |
157 | 80 | if (startMode) { |
158 | 60 | startLengthString.append(mask[currentIndex]); |
159 | 60 | currentIndex++; |
160 | |
} else { |
161 | 20 | endLengthString.append(mask[currentIndex]); |
162 | 20 | currentIndex++; |
163 | |
} |
164 | 20 | } else if (mask[currentIndex] == ',') { |
165 | 20 | if (startMode) { |
166 | |
|
167 | 20 | currentIndex++; |
168 | |
|
169 | 20 | startMode = false; |
170 | |
} else { |
171 | 0 | throw new ReverseREParseException("found second separator char " + mask[currentIndex], mask.toString(), |
172 | |
startIndex, currentIndex, ReverseRExpression.LENGTH); |
173 | |
} |
174 | |
} else { |
175 | 0 | throw new ReverseREParseException("found nondigit char: " + mask[currentIndex], mask.toString(), startIndex, |
176 | |
currentIndex, ReverseRExpression.LENGTH); |
177 | |
} |
178 | |
} |
179 | 50 | if (currentIndex == mask.length) { |
180 | 0 | throw new ReverseREParseException("expression does not end with } character", mask.toString(), startIndex, |
181 | |
currentIndex, ReverseRExpression.LENGTH); |
182 | |
} |
183 | 50 | if (mask[currentIndex] == '}') { |
184 | 50 | currentIndex++; |
185 | |
} |
186 | 50 | long startLength = Long.parseLong(startLengthString.toString()); |
187 | |
long endLength; |
188 | |
|
189 | 50 | if (!startMode) { |
190 | 20 | endLength = Long.parseLong(endLengthString.toString()); |
191 | |
} else { |
192 | 30 | endLength = startLength; |
193 | |
} |
194 | 50 | this.expressions.put(new Integer(nr), new ReverseLengthRExpression(startLength, endLength)); |
195 | 50 | return currentIndex; |
196 | |
} |
197 | |
|
198 | |
|
199 | |
|
200 | |
|
201 | |
|
202 | |
|
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | |
private int createRangeExpresssion(int nr, char[] mask, int startIndex) throws ReverseREParseException { |
208 | 60 | int currentIndex = startIndex; |
209 | 60 | HashSet<Character> rangeSet = new HashSet<Character>(); |
210 | |
|
211 | 150 | while (mask[currentIndex] != ']' && currentIndex < mask.length) { |
212 | 90 | char beginChar = mask[currentIndex]; |
213 | |
|
214 | 90 | if (currentIndex + 1 < mask.length) { |
215 | |
|
216 | 90 | if (mask[currentIndex + 1] == '-') { |
217 | |
|
218 | 70 | if (currentIndex + 2 < mask.length) { |
219 | 70 | char endChar = mask[currentIndex + 2]; |
220 | |
|
221 | 70 | addCharacterRange(rangeSet, beginChar, endChar); |
222 | 70 | currentIndex += 3; |
223 | |
|
224 | 70 | } else { |
225 | |
|
226 | 0 | throw new ReverseREParseException( |
227 | |
"Expression not valid. Ends with '-'; should end with an alphanumeric value.", mask |
228 | |
.toString(), startIndex, currentIndex, ReverseRExpression.RANGE); |
229 | |
} |
230 | |
} else { |
231 | |
|
232 | 20 | currentIndex++; |
233 | 20 | rangeSet.add(beginChar); |
234 | |
} |
235 | |
} else { |
236 | |
|
237 | 0 | currentIndex++; |
238 | 0 | rangeSet.add(beginChar); |
239 | |
} |
240 | 90 | } |
241 | 60 | if (currentIndex == mask.length) { |
242 | 0 | throw new ReverseREParseException("expression doens not end with ] character", mask.toString(), startIndex, |
243 | |
currentIndex, ReverseRExpression.LENGTH); |
244 | |
} |
245 | 60 | if (mask[currentIndex] == ']') { |
246 | 60 | currentIndex++; |
247 | |
} |
248 | 60 | this.expressions.put(nr, new ReverseRangeRExpression(rangeSet)); |
249 | 60 | return currentIndex; |
250 | |
} |
251 | |
|
252 | |
|
253 | |
|
254 | |
|
255 | |
|
256 | |
|
257 | |
|
258 | |
|
259 | |
private void addCharacterRange(HashSet<Character> rangeSet, char beginChar, char endChar) { |
260 | |
|
261 | 70 | if (Character.isDigit(beginChar) && Character.isDigit(endChar)) { |
262 | 30 | addRange(rangeSet, beginChar, endChar); |
263 | 40 | } else if ((Character.isLetter(beginChar) && Character.isDigit(endChar)) |
264 | |
|| (Character.isDigit(beginChar) && Character.isLetter(endChar))) { |
265 | |
|
266 | |
} |
267 | |
|
268 | 40 | else if ((Character.isLowerCase(beginChar) && Character.isLowerCase(endChar)) |
269 | |
|| (Character.isUpperCase(beginChar) && Character.isUpperCase(endChar))) { |
270 | 40 | addRange(rangeSet, beginChar, endChar); |
271 | 0 | } else if (Character.isLowerCase(beginChar) && Character.isUpperCase(endChar)) { |
272 | 0 | addRange(rangeSet, beginChar, 'z'); |
273 | 0 | addRange(rangeSet, 'A', endChar); |
274 | 0 | } else if (Character.isUpperCase(beginChar) && Character.isLowerCase(endChar)) { |
275 | 0 | addRange(rangeSet, beginChar, 'Z'); |
276 | 0 | addRange(rangeSet, 'a', endChar); |
277 | |
} else { |
278 | |
|
279 | |
} |
280 | 70 | } |
281 | |
|
282 | |
private void addRange(HashSet<Character> rangeSet, char beginChar, char endChar) { |
283 | 1410 | for (char ci = beginChar; ci <= endChar; ci++) { |
284 | 1340 | rangeSet.add(new Character(ci)); |
285 | |
} |
286 | 70 | } |
287 | |
} |