1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import java.util.Arrays;
21 import java.util.Locale;
22
23 import org.apache.commons.codec.EncoderException;
24 import org.apache.commons.codec.StringEncoder;
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 public class ColognePhonetic implements StringEncoder {
184
185
186
187
188
189
190 abstract static class CologneBuffer {
191
192 protected final char[] data;
193
194 protected int length;
195
196 CologneBuffer(final char[] data) {
197 this.data = data;
198 this.length = data.length;
199 }
200
201 CologneBuffer(final int buffSize) {
202 this.data = new char[buffSize];
203 this.length = 0;
204 }
205
206 protected abstract char[] copyData(int start, int length);
207
208 public boolean isEmpty() {
209 return length() == 0;
210 }
211
212 public int length() {
213 return length;
214 }
215
216 @Override
217 public String toString() {
218 return new String(copyData(0, length));
219 }
220 }
221
222 private final class CologneInputBuffer extends CologneBuffer {
223
224 CologneInputBuffer(final char[] data) {
225 super(data);
226 }
227
228 @Override
229 protected char[] copyData(final int start, final int length) {
230 final char[] newData = new char[length];
231 System.arraycopy(data, data.length - this.length + start, newData, 0, length);
232 return newData;
233 }
234
235 public char getNextChar() {
236 return data[getNextPos()];
237 }
238
239 protected int getNextPos() {
240 return data.length - length;
241 }
242
243 public char removeNext() {
244 final char ch = getNextChar();
245 length--;
246 return ch;
247 }
248 }
249
250 private final class CologneOutputBuffer extends CologneBuffer {
251
252 private char lastCode;
253
254 CologneOutputBuffer(final int buffSize) {
255 super(buffSize);
256 lastCode = '/';
257 }
258
259 @Override
260 protected char[] copyData(final int start, final int length) {
261 return Arrays.copyOfRange(data, start, length);
262 }
263
264
265
266
267
268
269
270
271
272 public void put(final char code) {
273 if (code != CHAR_IGNORE && lastCode != code && (code != '0' || length == 0)) {
274 data[length] = code;
275 length++;
276 }
277 lastCode = code;
278 }
279 }
280
281 private static final char[] AEIJOUY = { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
282 private static final char[] CSZ = { 'C', 'S', 'Z' };
283 private static final char[] FPVW = { 'F', 'P', 'V', 'W' };
284 private static final char[] GKQ = { 'G', 'K', 'Q' };
285 private static final char[] CKQ = { 'C', 'K', 'Q' };
286 private static final char[] AHKLOQRUX = { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
287
288 private static final char[] SZ = { 'S', 'Z' };
289
290 private static final char[] AHKOQUX = { 'A', 'H', 'K', 'O', 'Q', 'U', 'X' };
291
292 private static final char[] DTX = { 'D', 'T', 'X' };
293
294 private static final char CHAR_IGNORE = '-';
295
296
297
298
299 private static boolean arrayContains(final char[] arr, final char key) {
300 for (final char element : arr) {
301 if (element == key) {
302 return true;
303 }
304 }
305 return false;
306 }
307
308
309
310
311 public ColognePhonetic() {
312
313 }
314
315
316
317
318
319
320
321
322
323
324
325
326 public String colognePhonetic(final String text) {
327 if (text == null) {
328 return null;
329 }
330
331 final CologneInputBuffer input = new CologneInputBuffer(preprocess(text));
332 final CologneOutputBuffer output = new CologneOutputBuffer(input.length() * 2);
333
334 char nextChar;
335
336 char lastChar = CHAR_IGNORE;
337 char chr;
338
339 while (!input.isEmpty()) {
340 chr = input.removeNext();
341
342 if (!input.isEmpty()) {
343 nextChar = input.getNextChar();
344 } else {
345 nextChar = CHAR_IGNORE;
346 }
347
348 if (chr < 'A' || chr > 'Z') {
349 continue;
350 }
351
352 if (arrayContains(AEIJOUY, chr)) {
353 output.put('0');
354 } else if (chr == 'B' || chr == 'P' && nextChar != 'H') {
355 output.put('1');
356 } else if ((chr == 'D' || chr == 'T') && !arrayContains(CSZ, nextChar)) {
357 output.put('2');
358 } else if (arrayContains(FPVW, chr)) {
359 output.put('3');
360 } else if (arrayContains(GKQ, chr)) {
361 output.put('4');
362 } else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
363 output.put('4');
364 output.put('8');
365 } else if (chr == 'S' || chr == 'Z') {
366 output.put('8');
367 } else if (chr == 'C') {
368 if (output.isEmpty()) {
369 if (arrayContains(AHKLOQRUX, nextChar)) {
370 output.put('4');
371 } else {
372 output.put('8');
373 }
374 } else if (arrayContains(SZ, lastChar) || !arrayContains(AHKOQUX, nextChar)) {
375 output.put('8');
376 } else {
377 output.put('4');
378 }
379 } else if (arrayContains(DTX, chr)) {
380 output.put('8');
381 } else {
382 switch (chr) {
383 case 'R':
384 output.put('7');
385 break;
386 case 'L':
387 output.put('5');
388 break;
389 case 'M':
390 case 'N':
391 output.put('6');
392 break;
393 case 'H':
394 output.put(CHAR_IGNORE);
395 break;
396 default:
397 break;
398 }
399 }
400
401 lastChar = chr;
402 }
403 return output.toString();
404 }
405
406 @Override
407 public Object encode(final Object object) throws EncoderException {
408 if (!(object instanceof String)) {
409 throw new EncoderException("This method's parameter was expected to be of the type " +
410 String.class.getName() +
411 ". But actually it was of the type " +
412 object.getClass().getName() +
413 ".");
414 }
415 return encode((String) object);
416 }
417
418 @Override
419 public String encode(final String text) {
420 return colognePhonetic(text);
421 }
422
423
424
425
426
427
428
429
430
431 public boolean isEncodeEqual(final String text1, final String text2) {
432 return colognePhonetic(text1).equals(colognePhonetic(text2));
433 }
434
435
436
437
438
439
440
441
442
443
444
445 private char[] preprocess(final String text) {
446
447 final char[] chrs = text.toUpperCase(Locale.GERMAN).toCharArray();
448
449 for (int index = 0; index < chrs.length; index++) {
450 switch (chrs[index]) {
451 case '\u00C4':
452 chrs[index] = 'A';
453 break;
454 case '\u00DC':
455 chrs[index] = 'U';
456 break;
457 case '\u00D6':
458 chrs[index] = 'O';
459 break;
460 default:
461 break;
462 }
463 }
464 return chrs;
465 }
466 }