View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoder;
22  import org.apache.commons.codec.binary.StringUtils;
23  
24  /**
25   * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
26   * Philips</CITE>.
27   * <p>
28   * This class is conditionally thread-safe. The instance field for the maximum code length is mutable
29   * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
30   * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
31   * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
32   * </p>
33   *
34   * @see <a href="https://drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2">Dr. Dobbs Original Article</a>
35   * @see <a href="https://en.wikipedia.org/wiki/Metaphone">Wikipedia Metaphone</a>
36   */
37  public class DoubleMetaphone implements StringEncoder {
38  
39      /**
40       * Stores results, since there is the optional alternate encoding.
41       */
42      public class DoubleMetaphoneResult {
43  
44          private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
45          private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
46          private final int maxLength;
47  
48          /**
49           * Constructs a new instance.
50           *
51           * @param maxLength The maximum length.
52           */
53          public DoubleMetaphoneResult(final int maxLength) {
54              this.maxLength = maxLength;
55          }
56  
57          /**
58           * Appends the given value as primary and alternative.
59           *
60           * @param value The value to append.
61           */
62          public void append(final char value) {
63              appendPrimary(value);
64              appendAlternate(value);
65          }
66  
67          /**
68           * Appends the given primary and alternative values.
69           *
70           * @param primary   The primary value.
71           * @param alternate The alternate value.
72           */
73          public void append(final char primary, final char alternate) {
74              appendPrimary(primary);
75              appendAlternate(alternate);
76          }
77  
78          /**
79           * Appends the given value as primary and alternative.
80           *
81           * @param value The value to append.
82           */
83          public void append(final String value) {
84              appendPrimary(value);
85              appendAlternate(value);
86          }
87  
88          /**
89           * Appends the given primary and alternative values.
90           *
91           * @param primary   The primary value.
92           * @param alternate The alternate value.
93           */
94          public void append(final String primary, final String alternate) {
95              appendPrimary(primary);
96              appendAlternate(alternate);
97          }
98  
99          /**
100          * Appends the given value as alternative.
101          *
102          * @param value The value to append.
103          */
104         public void appendAlternate(final char value) {
105             if (this.alternate.length() < this.maxLength) {
106                 this.alternate.append(value);
107             }
108         }
109 
110         /**
111          * Appends the given value as alternative.
112          *
113          * @param value The value to append.
114          */
115         public void appendAlternate(final String value) {
116             final int addChars = this.maxLength - this.alternate.length();
117             if (value.length() <= addChars) {
118                 this.alternate.append(value);
119             } else {
120                 this.alternate.append(value, 0, addChars);
121             }
122         }
123 
124         /**
125          * Appends the given value as primary.
126          *
127          * @param value The value to append.
128          */
129         public void appendPrimary(final char value) {
130             if (this.primary.length() < this.maxLength) {
131                 this.primary.append(value);
132             }
133         }
134 
135         /**
136          * Appends the given value as primary.
137          *
138          * @param value The value to append.
139          */
140         public void appendPrimary(final String value) {
141             final int addChars = this.maxLength - this.primary.length();
142             if (value.length() <= addChars) {
143                 this.primary.append(value);
144             } else {
145                 this.primary.append(value, 0, addChars);
146             }
147         }
148 
149         /**
150          * Gets the alternate string.
151          *
152          * @return the alternate string.
153          */
154         public String getAlternate() {
155             return this.alternate.toString();
156         }
157 
158         /**
159          * Gets the primary string.
160          *
161          * @return the primary string.
162          */
163         public String getPrimary() {
164             return this.primary.toString();
165         }
166 
167         /**
168          * Tests whether this result is complete.
169          *
170          * @return whether this result is complete.
171          */
172         public boolean isComplete() {
173             return this.primary.length() >= this.maxLength && this.alternate.length() >= this.maxLength;
174         }
175     }
176 
177     /**
178      * "Vowels" to test.
179      */
180     private static final String VOWELS = "AEIOUY";
181 
182     /**
183      * Prefixes when present which are not pronounced.
184      */
185     private static final String[] SILENT_START = { "GN", "KN", "PN", "WR", "PS" };
186 
187     private static final String[] L_R_N_M_B_H_F_V_W_SPACE = { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
188     private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
189     private static final String[] L_T_K_S_N_M_B_Z = { "L", "T", "K", "S", "N", "M", "B", "Z" };
190 
191     /**
192      * Tests whether {@code value} contains any of the {@code criteria} starting at index {@code start} and matching up to length {@code length}.
193      *
194      * @param value    The value to test.
195      * @param start    Where in {@code value} to start testing.
196      * @param length   How many to test.
197      * @param criteria The search criteria.
198      * @return Whether there was a match.
199      */
200     protected static boolean contains(final String value, final int start, final int length, final String... criteria) {
201         boolean result = false;
202         if (start >= 0 && start + length <= value.length()) {
203             final String target = value.substring(start, start + length);
204             for (final String element : criteria) {
205                 if (target.equals(element)) {
206                     result = true;
207                     break;
208                 }
209             }
210         }
211         return result;
212     }
213 
214     /**
215      * Maximum length of an encoding, default is 4
216      */
217     private int maxCodeLen = 4;
218 
219     /**
220      * Constructs a new instance.
221      */
222     public DoubleMetaphone() {
223         // empty
224     }
225 
226     /**
227      * Gets the character at index {@code index} if available, or {@link Character#MIN_VALUE} if out of bounds.
228      *
229      * @param value The String to query.
230      * @param index A string index.
231      * @return The character at the index or {@link Character#MIN_VALUE} if out of bounds.
232      */
233     protected char charAt(final String value, final int index) {
234         if (index < 0 || index >= value.length()) {
235             return Character.MIN_VALUE;
236         }
237         return value.charAt(index);
238     }
239 
240     /**
241      * Cleans the input.
242      */
243     private String cleanInput(String input) {
244         if (input == null) {
245             return null;
246         }
247         input = input.trim();
248         if (input.isEmpty()) {
249             return null;
250         }
251         return input.toUpperCase(java.util.Locale.ENGLISH);
252     }
253 
254     /**
255      * Complex condition 0 for 'C'.
256      */
257     private boolean conditionC0(final String value, final int index) {
258         if (contains(value, index, 4, "CHIA")) {
259             return true;
260         }
261         if (index <= 1) {
262             return false;
263         }
264         if (isVowel(charAt(value, index - 2))) {
265             return false;
266         }
267         if (!contains(value, index - 1, 3, "ACH")) {
268             return false;
269         }
270         final char c = charAt(value, index + 2);
271         return c != 'I' && c != 'E' ||
272                 contains(value, index - 2, 6, "BACHER", "MACHER");
273     }
274 
275     /**
276      * Complex condition 0 for 'CH'.
277      */
278     private boolean conditionCH0(final String value, final int index) {
279         if (index != 0) {
280             return false;
281         }
282         if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
283                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
284             return false;
285         }
286         return !contains(value, 0, 5, "CHORE");
287     }
288 
289     /**
290      * Complex condition 1 for 'CH'.
291      */
292     private boolean conditionCH1(final String value, final int index) {
293         return contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH") ||
294                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
295                 contains(value, index + 2, 1, "T", "S") ||
296                 (contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
297                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1);
298     }
299 
300     /**
301      * Complex condition 0 for 'L'.
302      */
303     private boolean conditionL0(final String value, final int index) {
304         if (index == value.length() - 3 &&
305             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
306             return true;
307         }
308         return (contains(value, value.length() - 2, 2, "AS", "OS") ||
309                 contains(value, value.length() - 1, 1, "A", "O")) &&
310                 contains(value, index - 1, 4, "ALLE");
311     }
312 
313     //-- BEGIN HANDLERS --//
314 
315     /**
316      * Complex condition 0 for 'M'.
317      */
318     private boolean conditionM0(final String value, final int index) {
319         if (charAt(value, index + 1) == 'M') {
320             return true;
321         }
322         return contains(value, index - 1, 3, "UMB") &&
323                (index + 1 == value.length() - 1 || contains(value, index + 2, 2, "ER"));
324     }
325 
326     /**
327      * Encode a value with Double Metaphone.
328      *
329      * @param value String to encode
330      * @return an encoded string
331      */
332     public String doubleMetaphone(final String value) {
333         return doubleMetaphone(value, false);
334     }
335 
336     /**
337      * Encode a value with Double Metaphone, optionally using the alternate encoding.
338      *
339      * @param value String to encode
340      * @param alternate use alternate encode
341      * @return an encoded string
342      */
343     public String doubleMetaphone(String value, final boolean alternate) {
344         value = cleanInput(value);
345         if (value == null) {
346             return null;
347         }
348 
349         final boolean slavoGermanic = isSlavoGermanic(value);
350         int index = isSilentStart(value) ? 1 : 0;
351 
352         final DoubleMetaphoneResult result = new DoubleMetaphoneResult(getMaxCodeLen());
353 
354         while (!result.isComplete() && index <= value.length() - 1) {
355             switch (value.charAt(index)) {
356             case 'A':
357             case 'E':
358             case 'I':
359             case 'O':
360             case 'U':
361             case 'Y':
362                 index = handleAEIOUY(result, index);
363                 break;
364             case 'B':
365                 result.append('P');
366                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
367                 break;
368             case '\u00C7':
369                 // A C with a Cedilla
370                 result.append('S');
371                 index++;
372                 break;
373             case 'C':
374                 index = handleC(value, result, index);
375                 break;
376             case 'D':
377                 index = handleD(value, result, index);
378                 break;
379             case 'F':
380                 result.append('F');
381                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
382                 break;
383             case 'G':
384                 index = handleG(value, result, index, slavoGermanic);
385                 break;
386             case 'H':
387                 index = handleH(value, result, index);
388                 break;
389             case 'J':
390                 index = handleJ(value, result, index, slavoGermanic);
391                 break;
392             case 'K':
393                 result.append('K');
394                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
395                 break;
396             case 'L':
397                 index = handleL(value, result, index);
398                 break;
399             case 'M':
400                 result.append('M');
401                 index = conditionM0(value, index) ? index + 2 : index + 1;
402                 break;
403             case 'N':
404                 result.append('N');
405                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
406                 break;
407             case '\u00D1':
408                 // N with a tilde (spanish ene)
409                 result.append('N');
410                 index++;
411                 break;
412             case 'P':
413                 index = handleP(value, result, index);
414                 break;
415             case 'Q':
416                 result.append('K');
417                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
418                 break;
419             case 'R':
420                 index = handleR(value, result, index, slavoGermanic);
421                 break;
422             case 'S':
423                 index = handleS(value, result, index, slavoGermanic);
424                 break;
425             case 'T':
426                 index = handleT(value, result, index);
427                 break;
428             case 'V':
429                 result.append('F');
430                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
431                 break;
432             case 'W':
433                 index = handleW(value, result, index);
434                 break;
435             case 'X':
436                 index = handleX(value, result, index);
437                 break;
438             case 'Z':
439                 index = handleZ(value, result, index, slavoGermanic);
440                 break;
441             default:
442                 index++;
443                 break;
444             }
445         }
446 
447         return alternate ? result.getAlternate() : result.getPrimary();
448     }
449 
450     /**
451      * Encode the value using DoubleMetaphone.  It will only work if
452      * {@code obj} is a {@code String} (like {@code Metaphone}).
453      *
454      * @param obj Object to encode (should be of type String)
455      * @return An encoded Object (will be of type String)
456      * @throws EncoderException encode parameter is not of type String
457      */
458     @Override
459     public Object encode(final Object obj) throws EncoderException {
460         if (!(obj instanceof String)) {
461             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
462         }
463         return doubleMetaphone((String) obj);
464     }
465 
466     /**
467      * Encode the value using DoubleMetaphone.
468      *
469      * @param value String to encode
470      * @return An encoded String
471      */
472     @Override
473     public String encode(final String value) {
474         return doubleMetaphone(value);
475     }
476 
477     /**
478      * Returns the maxCodeLen.
479      * @return int
480      */
481     public int getMaxCodeLen() {
482         return this.maxCodeLen;
483     }
484 
485     /**
486      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
487      */
488     private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
489         if (index == 0) {
490             result.append('A');
491         }
492         return index + 1;
493     }
494 
495     /**
496      * Handles 'C' cases.
497      */
498     private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
499         if (conditionC0(value, index)) {  // very confusing, moved out
500             result.append('K');
501             index += 2;
502         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
503             result.append('S');
504             index += 2;
505         } else if (contains(value, index, 2, "CH")) {
506             index = handleCH(value, result, index);
507         } else if (contains(value, index, 2, "CZ") &&
508                    !contains(value, index - 2, 4, "WICZ")) {
509             //-- "Czerny" --//
510             result.append('S', 'X');
511             index += 2;
512         } else if (contains(value, index + 1, 3, "CIA")) {
513             //-- "focaccia" --//
514             result.append('X');
515             index += 3;
516         } else if (contains(value, index, 2, "CC") &&
517                    !(index == 1 && charAt(value, 0) == 'M')) {
518             //-- double "cc" but not "McClelland" --//
519             return handleCC(value, result, index);
520         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
521             result.append('K');
522             index += 2;
523         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
524             //-- Italian vs. English --//
525             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
526                 result.append('S', 'X');
527             } else {
528                 result.append('S');
529             }
530             index += 2;
531         } else {
532             result.append('K');
533             if (contains(value, index + 1, 2, " C", " Q", " G")) {
534                 //-- Mac Caffrey, Mac Gregor --//
535                 index += 3;
536             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
537                        !contains(value, index + 1, 2, "CE", "CI")) {
538                 index += 2;
539             } else {
540                 index++;
541             }
542         }
543 
544         return index;
545     }
546 
547     /**
548      * Handles 'CC' cases.
549      */
550     private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
551         if (contains(value, index + 2, 1, "I", "E", "H") &&
552             !contains(value, index + 2, 2, "HU")) {
553             //-- "bellocchio" but not "bacchus" --//
554             if (index == 1 && charAt(value, index - 1) == 'A' ||
555                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
556                 //-- "accident", "accede", "succeed" --//
557                 result.append("KS");
558             } else {
559                 //-- "bacci", "bertucci", other Italian --//
560                 result.append('X');
561             }
562             index += 3;
563         } else {    // Pierce's rule
564             result.append('K');
565             index += 2;
566         }
567 
568         return index;
569     }
570 
571     /**
572      * Handles 'CH' cases.
573      */
574     private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
575         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
576             result.append('K', 'X');
577             return index + 2;
578         }
579         if (conditionCH0(value, index)) {
580             //-- Greek roots ("chemistry", "chorus", etc.) --//
581             result.append('K');
582             return index + 2;
583         }
584         if (conditionCH1(value, index)) {
585             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
586             result.append('K');
587             return index + 2;
588         }
589         if (index > 0) {
590             if (contains(value, 0, 2, "MC")) {
591                 result.append('K');
592             } else {
593                 result.append('X', 'K');
594             }
595         } else {
596             result.append('X');
597         }
598         return index + 2;
599     }
600 
601     /**
602      * Handles 'D' cases.
603      */
604     private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
605         if (contains(value, index, 2, "DG")) {
606             //-- "Edge" --//
607             if (contains(value, index + 2, 1, "I", "E", "Y")) {
608                 result.append('J');
609                 index += 3;
610                 //-- "Edgar" --//
611             } else {
612                 result.append("TK");
613                 index += 2;
614             }
615         } else if (contains(value, index, 2, "DT", "DD")) {
616             result.append('T');
617             index += 2;
618         } else {
619             result.append('T');
620             index++;
621         }
622         return index;
623     }
624 
625     /**
626      * Handles 'G' cases.
627      */
628     private int handleG(final String value, final DoubleMetaphoneResult result, int index,
629                         final boolean slavoGermanic) {
630         if (charAt(value, index + 1) == 'H') {
631             index = handleGH(value, result, index);
632         } else if (charAt(value, index + 1) == 'N') {
633             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
634                 result.append("KN", "N");
635             } else if (!contains(value, index + 2, 2, "EY") &&
636                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
637                 result.append("N", "KN");
638             } else {
639                 result.append("KN");
640             }
641             index += 2;
642         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
643             result.append("KL", "L");
644             index += 2;
645         } else if (index == 0 &&
646                    (charAt(value, index + 1) == 'Y' ||
647                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
648             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
649             result.append('K', 'J');
650             index += 2;
651         } else if ((contains(value, index + 1, 2, "ER") ||
652                     charAt(value, index + 1) == 'Y') &&
653                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
654                    !contains(value, index - 1, 1, "E", "I") &&
655                    !contains(value, index - 1, 3, "RGY", "OGY")) {
656             //-- -ger-, -gy- --//
657             result.append('K', 'J');
658             index += 2;
659         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
660                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
661             //-- Italian "biaggi" --//
662             if (contains(value, 0, 4, "VAN ", "VON ") ||
663                 contains(value, 0, 3, "SCH") ||
664                 contains(value, index + 1, 2, "ET")) {
665                 //-- obvious germanic --//
666                 result.append('K');
667             } else if (contains(value, index + 1, 3, "IER")) {
668                 result.append('J');
669             } else {
670                 result.append('J', 'K');
671             }
672             index += 2;
673         } else {
674             if (charAt(value, index + 1) == 'G') {
675                 index += 2;
676             } else {
677                 index++;
678             }
679             result.append('K');
680         }
681         return index;
682     }
683 
684     /**
685      * Handles 'GH' cases.
686      */
687     private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
688         if (index > 0 && !isVowel(charAt(value, index - 1))) {
689             result.append('K');
690             index += 2;
691         } else if (index == 0) {
692             if (charAt(value, index + 2) == 'I') {
693                 result.append('J');
694             } else {
695                 result.append('K');
696             }
697             index += 2;
698         } else if (index > 1 && contains(value, index - 2, 1, "B", "H", "D") ||
699                    index > 2 && contains(value, index - 3, 1, "B", "H", "D") ||
700                    index > 3 && contains(value, index - 4, 1, "B", "H")) {
701             //-- Parker's rule (with some further refinements) - "hugh"
702             index += 2;
703         } else {
704             if (index > 2 && charAt(value, index - 1) == 'U' &&
705                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
706                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
707                 result.append('F');
708             } else if (index > 0 && charAt(value, index - 1) != 'I') {
709                 result.append('K');
710             }
711             index += 2;
712         }
713         return index;
714     }
715 
716     /**
717      * Handles 'H' cases.
718      */
719     private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
720         //-- only keep if first & before vowel or between 2 vowels --//
721         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
722             isVowel(charAt(value, index + 1))) {
723             result.append('H');
724             index += 2;
725             //-- also takes car of "HH" --//
726         } else {
727             index++;
728         }
729         return index;
730     }
731 
732     /**
733      * Handles 'J' cases.
734      */
735     private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
736                         final boolean slavoGermanic) {
737         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
738                 //-- obvious Spanish, "Jose", "San Jacinto" --//
739                 if (index == 0 && charAt(value, index + 4) == ' ' ||
740                      value.length() == 4 || contains(value, 0, 4, "SAN ")) {
741                     result.append('H');
742                 } else {
743                     result.append('J', 'H');
744                 }
745                 index++;
746             } else {
747                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
748                     result.append('J', 'A');
749                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
750                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
751                     result.append('J', 'H');
752                 } else if (index == value.length() - 1) {
753                     result.append('J', ' ');
754                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
755                            !contains(value, index - 1, 1, "S", "K", "L")) {
756                     result.append('J');
757                 }
758 
759                 if (charAt(value, index + 1) == 'J') {
760                     index += 2;
761                 } else {
762                     index++;
763                 }
764             }
765         return index;
766     }
767 
768     /**
769      * Handles 'L' cases.
770      */
771     private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
772         if (charAt(value, index + 1) == 'L') {
773             if (conditionL0(value, index)) {
774                 result.appendPrimary('L');
775             } else {
776                 result.append('L');
777             }
778             index += 2;
779         } else {
780             index++;
781             result.append('L');
782         }
783         return index;
784     }
785 
786     /**
787      * Handles 'P' cases.
788      */
789     private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
790         if (charAt(value, index + 1) == 'H') {
791             result.append('F');
792             index += 2;
793         } else {
794             result.append('P');
795             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
796         }
797         return index;
798     }
799 
800     /**
801      * Handles 'R' cases.
802      */
803     private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
804                         final boolean slavoGermanic) {
805         if (index == value.length() - 1 && !slavoGermanic &&
806             contains(value, index - 2, 2, "IE") &&
807             !contains(value, index - 4, 2, "ME", "MA")) {
808             result.appendAlternate('R');
809         } else {
810             result.append('R');
811         }
812         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
813     }
814 
815     //-- BEGIN CONDITIONS --//
816 
817     /**
818      * Handles 'S' cases.
819      */
820     private int handleS(final String value, final DoubleMetaphoneResult result, int index,
821                         final boolean slavoGermanic) {
822         if (contains(value, index - 1, 3, "ISL", "YSL")) {
823             //-- special cases "island", "isle", "carlisle", "carlysle" --//
824             index++;
825         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
826             //-- special case "sugar-" --//
827             result.append('X', 'S');
828             index++;
829         } else if (contains(value, index, 2, "SH")) {
830             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
831                 //-- germanic --//
832                 result.append('S');
833             } else {
834                 result.append('X');
835             }
836             index += 2;
837         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
838             //-- Italian and Armenian --//
839             if (slavoGermanic) {
840                 result.append('S');
841             } else {
842                 result.append('S', 'X');
843             }
844             index += 3;
845         } else if (index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W") ||
846                    contains(value, index + 1, 1, "Z")) {
847             //-- german & anglicisations, e.g. "smith" match "schmidt" //
848             // "snider" match "schneider" --//
849             //-- also, -sz- in slavic language although in hungarian it //
850             //   is pronounced "s" --//
851             result.append('S', 'X');
852             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
853         } else if (contains(value, index, 2, "SC")) {
854             index = handleSC(value, result, index);
855         } else {
856             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
857                 //-- french e.g. "resnais", "artois" --//
858                 result.appendAlternate('S');
859             } else {
860                 result.append('S');
861             }
862             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
863         }
864         return index;
865     }
866 
867     /**
868      * Handles 'SC' cases.
869      */
870     private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
871         if (charAt(value, index + 2) == 'H') {
872             //-- Schlesinger's rule --//
873             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
874                 //-- Dutch origin, e.g. "school", "schooner" --//
875                 if (contains(value, index + 3, 2, "ER", "EN")) {
876                     //-- "schermerhorn", "schenker" --//
877                     result.append("X", "SK");
878                 } else {
879                     result.append("SK");
880                 }
881             } else if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
882                 result.append('X', 'S');
883             } else {
884                 result.append('X');
885             }
886         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
887             result.append('S');
888         } else {
889             result.append("SK");
890         }
891         return index + 3;
892     }
893 
894     /**
895      * Handles 'T' cases.
896      */
897     private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
898         if (contains(value, index, 4, "TION") || contains(value, index, 3, "TIA", "TCH")) {
899             result.append('X');
900             index += 3;
901         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
902             if (contains(value, index + 2, 2, "OM", "AM") ||
903                 //-- special case "thomas", "thames" or germanic --//
904                 contains(value, 0, 4, "VAN ", "VON ") ||
905                 contains(value, 0, 3, "SCH")) {
906                 result.append('T');
907             } else {
908                 result.append('0', 'T');
909             }
910             index += 2;
911         } else {
912             result.append('T');
913             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
914         }
915         return index;
916     }
917 
918     /**
919      * Handles 'W' cases.
920      */
921     private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
922         if (contains(value, index, 2, "WR")) {
923             //-- can also be in middle of word --//
924             result.append('R');
925             index += 2;
926         } else if (index == 0 && (isVowel(charAt(value, index + 1)) ||
927                            contains(value, index, 2, "WH"))) {
928             if (isVowel(charAt(value, index + 1))) {
929                 //-- Wasserman should match Vasserman --//
930                 result.append('A', 'F');
931             } else {
932                 //-- need Uomo to match Womo --//
933                 result.append('A');
934             }
935             index++;
936         } else if (index == value.length() - 1 && isVowel(charAt(value, index - 1)) ||
937                    contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
938                    contains(value, 0, 3, "SCH")) {
939             //-- Arnow should match Arnoff --//
940             result.appendAlternate('F');
941             index++;
942         } else if (contains(value, index, 4, "WICZ", "WITZ")) {
943             //-- Polish e.g. "filipowicz" --//
944             result.append("TS", "FX");
945             index += 4;
946         } else {
947             index++;
948         }
949         return index;
950     }
951 
952     /**
953      * Handles 'X' cases.
954      */
955     private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
956         if (index == 0) {
957             result.append('S');
958             index++;
959         } else {
960             if (!(index == value.length() - 1 &&
961                   (contains(value, index - 3, 3, "IAU", "EAU") ||
962                    contains(value, index - 2, 2, "AU", "OU")))) {
963                 //-- French e.g. breaux --//
964                 result.append("KS");
965             }
966             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
967         }
968         return index;
969     }
970 
971     //-- BEGIN HELPER FUNCTIONS --//
972 
973     /**
974      * Handles 'Z' cases.
975      */
976     private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
977                         final boolean slavoGermanic) {
978         if (charAt(value, index + 1) == 'H') {
979             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
980             result.append('J');
981             index += 2;
982         } else {
983             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
984                 slavoGermanic && index > 0 && charAt(value, index - 1) != 'T') {
985                 result.append("S", "TS");
986             } else {
987                 result.append('S');
988             }
989             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
990         }
991         return index;
992     }
993 
994     /**
995      * Check if the Double Metaphone values of two {@code String} values
996      * are equal.
997      *
998      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
999      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
1000      * @return {@code true} if the encoded {@code String}s are equal;
1001      *          {@code false} otherwise.
1002      * @see #isDoubleMetaphoneEqual(String,String,boolean)
1003      */
1004     public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
1005         return isDoubleMetaphoneEqual(value1, value2, false);
1006     }
1007 
1008     /**
1009      * Check if the Double Metaphone values of two {@code String} values
1010      * are equal, optionally using the alternate value.
1011      *
1012      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
1013      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
1014      * @param alternate use the alternate value if {@code true}.
1015      * @return {@code true} if the encoded {@code String}s are equal;
1016      *          {@code false} otherwise.
1017      */
1018     public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
1019         return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
1020     }
1021 
1022     /**
1023      * Determines whether or not the value starts with a silent letter.  It will
1024      * return {@code true} if the value starts with any of 'GN', 'KN',
1025      * 'PN', 'WR' or 'PS'.
1026      */
1027     private boolean isSilentStart(final String value) {
1028         boolean result = false;
1029         for (final String element : SILENT_START) {
1030             if (value.startsWith(element)) {
1031                 result = true;
1032                 break;
1033             }
1034         }
1035         return result;
1036     }
1037 
1038     /**
1039      * Determines whether or not a value is of slavo-germanic origin. A value is
1040      * of slavo-germanic origin if it contains any of 'W', 'K', 'CZ', or 'WITZ'.
1041      */
1042     private boolean isSlavoGermanic(final String value) {
1043         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
1044                 value.contains("CZ") || value.contains("WITZ");
1045     }
1046 
1047     /**
1048      * Determines whether or not a character is a vowel or not
1049      */
1050     private boolean isVowel(final char ch) {
1051         return VOWELS.indexOf(ch) != -1;
1052     }
1053 
1054     //-- BEGIN INNER CLASSES --//
1055 
1056     /**
1057      * Sets the maxCodeLen.
1058      * @param maxCodeLen The maxCodeLen to set
1059      */
1060     public void setMaxCodeLen(final int maxCodeLen) {
1061         this.maxCodeLen = maxCodeLen;
1062     }
1063 }