1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import java.util.Arrays;
21 import java.util.Locale;
22
23 import org.apache.commons.codec.EncoderException;
24 import org.apache.commons.codec.StringEncoder;
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 public class ColognePhonetic implements StringEncoder {
184
185
186
187
188
189
190 abstract static class CologneBuffer {
191
192 protected final char[] data;
193
194 protected int length;
195
196 public CologneBuffer(final char[] data) {
197 this.data = data;
198 this.length = data.length;
199 }
200
201 public CologneBuffer(final int buffSize) {
202 this.data = new char[buffSize];
203 this.length = 0;
204 }
205
206 protected abstract char[] copyData(int start, int length);
207
208 public boolean isEmpty() {
209 return length() == 0;
210 }
211
212 public int length() {
213 return length;
214 }
215
216 @Override
217 public String toString() {
218 return new String(copyData(0, length));
219 }
220 }
221 private final class CologneInputBuffer extends CologneBuffer {
222
223 public CologneInputBuffer(final char[] data) {
224 super(data);
225 }
226
227 @Override
228 protected char[] copyData(final int start, final int length) {
229 final char[] newData = new char[length];
230 System.arraycopy(data, data.length - this.length + start, newData, 0, length);
231 return newData;
232 }
233
234 public char getNextChar() {
235 return data[getNextPos()];
236 }
237
238 protected int getNextPos() {
239 return data.length - length;
240 }
241
242 public char removeNext() {
243 final char ch = getNextChar();
244 length--;
245 return ch;
246 }
247 }
248 private final class CologneOutputBuffer extends CologneBuffer {
249
250 private char lastCode;
251
252 public CologneOutputBuffer(final int buffSize) {
253 super(buffSize);
254 lastCode = '/';
255 }
256
257 @Override
258 protected char[] copyData(final int start, final int length) {
259 return Arrays.copyOfRange(data, start, length);
260 }
261
262
263
264
265
266
267
268
269
270 public void put(final char code) {
271 if (code != CHAR_IGNORE && lastCode != code && (code != '0' || length == 0)) {
272 data[length] = code;
273 length++;
274 }
275 lastCode = code;
276 }
277 }
278
279 private static final char[] AEIJOUY = { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
280 private static final char[] CSZ = { 'C', 'S', 'Z' };
281 private static final char[] FPVW = { 'F', 'P', 'V', 'W' };
282 private static final char[] GKQ = { 'G', 'K', 'Q' };
283 private static final char[] CKQ = { 'C', 'K', 'Q' };
284 private static final char[] AHKLOQRUX = { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
285
286 private static final char[] SZ = { 'S', 'Z' };
287
288 private static final char[] AHKOQUX = { 'A', 'H', 'K', 'O', 'Q', 'U', 'X' };
289
290 private static final char[] DTX = { 'D', 'T', 'X' };
291
292 private static final char CHAR_IGNORE = '-';
293
294
295
296
297 private static boolean arrayContains(final char[] arr, final char key) {
298 for (final char element : arr) {
299 if (element == key) {
300 return true;
301 }
302 }
303 return false;
304 }
305
306
307
308
309
310
311
312
313
314
315
316
317 public String colognePhonetic(final String text) {
318 if (text == null) {
319 return null;
320 }
321
322 final CologneInputBuffer input = new CologneInputBuffer(preprocess(text));
323 final CologneOutputBuffer output = new CologneOutputBuffer(input.length() * 2);
324
325 char nextChar;
326
327 char lastChar = CHAR_IGNORE;
328 char chr;
329
330 while (!input.isEmpty()) {
331 chr = input.removeNext();
332
333 if (!input.isEmpty()) {
334 nextChar = input.getNextChar();
335 } else {
336 nextChar = CHAR_IGNORE;
337 }
338
339 if (chr < 'A' || chr > 'Z') {
340 continue;
341 }
342
343 if (arrayContains(AEIJOUY, chr)) {
344 output.put('0');
345 } else if (chr == 'B' || chr == 'P' && nextChar != 'H') {
346 output.put('1');
347 } else if ((chr == 'D' || chr == 'T') && !arrayContains(CSZ, nextChar)) {
348 output.put('2');
349 } else if (arrayContains(FPVW, chr)) {
350 output.put('3');
351 } else if (arrayContains(GKQ, chr)) {
352 output.put('4');
353 } else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
354 output.put('4');
355 output.put('8');
356 } else if (chr == 'S' || chr == 'Z') {
357 output.put('8');
358 } else if (chr == 'C') {
359 if (output.isEmpty()) {
360 if (arrayContains(AHKLOQRUX, nextChar)) {
361 output.put('4');
362 } else {
363 output.put('8');
364 }
365 } else if (arrayContains(SZ, lastChar) || !arrayContains(AHKOQUX, nextChar)) {
366 output.put('8');
367 } else {
368 output.put('4');
369 }
370 } else if (arrayContains(DTX, chr)) {
371 output.put('8');
372 } else {
373 switch (chr) {
374 case 'R':
375 output.put('7');
376 break;
377 case 'L':
378 output.put('5');
379 break;
380 case 'M':
381 case 'N':
382 output.put('6');
383 break;
384 case 'H':
385 output.put(CHAR_IGNORE);
386 break;
387 default:
388 break;
389 }
390 }
391
392 lastChar = chr;
393 }
394 return output.toString();
395 }
396
397 @Override
398 public Object encode(final Object object) throws EncoderException {
399 if (!(object instanceof String)) {
400 throw new EncoderException("This method's parameter was expected to be of the type " +
401 String.class.getName() +
402 ". But actually it was of the type " +
403 object.getClass().getName() +
404 ".");
405 }
406 return encode((String) object);
407 }
408
409 @Override
410 public String encode(final String text) {
411 return colognePhonetic(text);
412 }
413
414
415
416
417
418
419
420
421
422 public boolean isEncodeEqual(final String text1, final String text2) {
423 return colognePhonetic(text1).equals(colognePhonetic(text2));
424 }
425
426
427
428
429
430
431
432
433
434
435
436 private char[] preprocess(final String text) {
437
438 final char[] chrs = text.toUpperCase(Locale.GERMAN).toCharArray();
439
440 for (int index = 0; index < chrs.length; index++) {
441 switch (chrs[index]) {
442 case '\u00C4':
443 chrs[index] = 'A';
444 break;
445 case '\u00DC':
446 chrs[index] = 'U';
447 break;
448 case '\u00D6':
449 chrs[index] = 'O';
450 break;
451 default:
452 break;
453 }
454 }
455 return chrs;
456 }
457 }