001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import org.apache.commons.codec.EncoderException;
021import org.apache.commons.codec.StringEncoder;
022import org.apache.commons.codec.binary.StringUtils;
023
024/**
025 * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
026 * Philips</CITE>.
027 * <p>
028 * This class is conditionally thread-safe. The instance field for the maximum code length is mutable
029 * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
030 * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
031 * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
032 * </p>
033 *
034 * @see <a href="https://drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2">Dr. Dobbs Original Article</a>
035 * @see <a href="https://en.wikipedia.org/wiki/Metaphone">Wikipedia Metaphone</a>
036 */
037public class DoubleMetaphone implements StringEncoder {
038
039    /**
040     * Stores results, since there is the optional alternate encoding.
041     */
042    public class DoubleMetaphoneResult {
043
044        private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
045        private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
046        private final int maxLength;
047
048        /**
049         * Constructs a new instance.
050         *
051         * @param maxLength The maximum length.
052         */
053        public DoubleMetaphoneResult(final int maxLength) {
054            this.maxLength = maxLength;
055        }
056
057        /**
058         * Appends the given value as primary and alternative.
059         *
060         * @param value The value to append.
061         */
062        public void append(final char value) {
063            appendPrimary(value);
064            appendAlternate(value);
065        }
066
067        /**
068         * Appends the given primary and alternative values.
069         *
070         * @param primary   The primary value.
071         * @param alternate The alternate value.
072         */
073        public void append(final char primary, final char alternate) {
074            appendPrimary(primary);
075            appendAlternate(alternate);
076        }
077
078        /**
079         * Appends the given value as primary and alternative.
080         *
081         * @param value The value to append.
082         */
083        public void append(final String value) {
084            appendPrimary(value);
085            appendAlternate(value);
086        }
087
088        /**
089         * Appends the given primary and alternative values.
090         *
091         * @param primary   The primary value.
092         * @param alternate The alternate value.
093         */
094        public void append(final String primary, final String alternate) {
095            appendPrimary(primary);
096            appendAlternate(alternate);
097        }
098
099        /**
100         * Appends the given value as alternative.
101         *
102         * @param value The value to append.
103         */
104        public void appendAlternate(final char value) {
105            if (this.alternate.length() < this.maxLength) {
106                this.alternate.append(value);
107            }
108        }
109
110        /**
111         * Appends the given value as alternative.
112         *
113         * @param value The value to append.
114         */
115        public void appendAlternate(final String value) {
116            final int addChars = this.maxLength - this.alternate.length();
117            if (value.length() <= addChars) {
118                this.alternate.append(value);
119            } else {
120                this.alternate.append(value, 0, addChars);
121            }
122        }
123
124        /**
125         * Appends the given value as primary.
126         *
127         * @param value The value to append.
128         */
129        public void appendPrimary(final char value) {
130            if (this.primary.length() < this.maxLength) {
131                this.primary.append(value);
132            }
133        }
134
135        /**
136         * Appends the given value as primary.
137         *
138         * @param value The value to append.
139         */
140        public void appendPrimary(final String value) {
141            final int addChars = this.maxLength - this.primary.length();
142            if (value.length() <= addChars) {
143                this.primary.append(value);
144            } else {
145                this.primary.append(value, 0, addChars);
146            }
147        }
148
149        /**
150         * Gets the alternate string.
151         *
152         * @return the alternate string.
153         */
154        public String getAlternate() {
155            return this.alternate.toString();
156        }
157
158        /**
159         * Gets the primary string.
160         *
161         * @return the primary string.
162         */
163        public String getPrimary() {
164            return this.primary.toString();
165        }
166
167        /**
168         * Tests whether this result is complete.
169         *
170         * @return whether this result is complete.
171         */
172        public boolean isComplete() {
173            return this.primary.length() >= this.maxLength && this.alternate.length() >= this.maxLength;
174        }
175    }
176
177    /**
178     * "Vowels" to test.
179     */
180    private static final String VOWELS = "AEIOUY";
181
182    /**
183     * Prefixes when present which are not pronounced.
184     */
185    private static final String[] SILENT_START = { "GN", "KN", "PN", "WR", "PS" };
186
187    private static final String[] L_R_N_M_B_H_F_V_W_SPACE = { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
188    private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
189    private static final String[] L_T_K_S_N_M_B_Z = { "L", "T", "K", "S", "N", "M", "B", "Z" };
190
191    /**
192     * Tests whether {@code value} contains any of the {@code criteria} starting at index {@code start} and matching up to length {@code length}.
193     *
194     * @param value    The value to test.
195     * @param start    Where in {@code value} to start testing.
196     * @param length   How many to test.
197     * @param criteria The search criteria.
198     * @return Whether there was a match.
199     */
200    protected static boolean contains(final String value, final int start, final int length, final String... criteria) {
201        boolean result = false;
202        if (start >= 0 && start + length <= value.length()) {
203            final String target = value.substring(start, start + length);
204            for (final String element : criteria) {
205                if (target.equals(element)) {
206                    result = true;
207                    break;
208                }
209            }
210        }
211        return result;
212    }
213
214    /**
215     * Maximum length of an encoding, default is 4
216     */
217    private int maxCodeLen = 4;
218
219    /**
220     * Constructs a new instance.
221     */
222    public DoubleMetaphone() {
223        // empty
224    }
225
226    /**
227     * Gets the character at index {@code index} if available, or {@link Character#MIN_VALUE} if out of bounds.
228     *
229     * @param value The String to query.
230     * @param index A string index.
231     * @return The character at the index or {@link Character#MIN_VALUE} if out of bounds.
232     */
233    protected char charAt(final String value, final int index) {
234        if (index < 0 || index >= value.length()) {
235            return Character.MIN_VALUE;
236        }
237        return value.charAt(index);
238    }
239
240    /**
241     * Cleans the input.
242     */
243    private String cleanInput(String input) {
244        if (input == null) {
245            return null;
246        }
247        input = input.trim();
248        if (input.isEmpty()) {
249            return null;
250        }
251        return input.toUpperCase(java.util.Locale.ENGLISH);
252    }
253
254    /**
255     * Complex condition 0 for 'C'.
256     */
257    private boolean conditionC0(final String value, final int index) {
258        if (contains(value, index, 4, "CHIA")) {
259            return true;
260        }
261        if (index <= 1) {
262            return false;
263        }
264        if (isVowel(charAt(value, index - 2))) {
265            return false;
266        }
267        if (!contains(value, index - 1, 3, "ACH")) {
268            return false;
269        }
270        final char c = charAt(value, index + 2);
271        return c != 'I' && c != 'E' ||
272                contains(value, index - 2, 6, "BACHER", "MACHER");
273    }
274
275    /**
276     * Complex condition 0 for 'CH'.
277     */
278    private boolean conditionCH0(final String value, final int index) {
279        if (index != 0) {
280            return false;
281        }
282        if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
283                   !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
284            return false;
285        }
286        return !contains(value, 0, 5, "CHORE");
287    }
288
289    /**
290     * Complex condition 1 for 'CH'.
291     */
292    private boolean conditionCH1(final String value, final int index) {
293        return contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH") ||
294                contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
295                contains(value, index + 2, 1, "T", "S") ||
296                (contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
297                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1);
298    }
299
300    /**
301     * Complex condition 0 for 'L'.
302     */
303    private boolean conditionL0(final String value, final int index) {
304        if (index == value.length() - 3 &&
305            contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
306            return true;
307        }
308        return (contains(value, value.length() - 2, 2, "AS", "OS") ||
309                contains(value, value.length() - 1, 1, "A", "O")) &&
310                contains(value, index - 1, 4, "ALLE");
311    }
312
313    //-- BEGIN HANDLERS --//
314
315    /**
316     * Complex condition 0 for 'M'.
317     */
318    private boolean conditionM0(final String value, final int index) {
319        if (charAt(value, index + 1) == 'M') {
320            return true;
321        }
322        return contains(value, index - 1, 3, "UMB") &&
323               (index + 1 == value.length() - 1 || contains(value, index + 2, 2, "ER"));
324    }
325
326    /**
327     * Encode a value with Double Metaphone.
328     *
329     * @param value String to encode
330     * @return an encoded string
331     */
332    public String doubleMetaphone(final String value) {
333        return doubleMetaphone(value, false);
334    }
335
336    /**
337     * Encode a value with Double Metaphone, optionally using the alternate encoding.
338     *
339     * @param value String to encode
340     * @param alternate use alternate encode
341     * @return an encoded string
342     */
343    public String doubleMetaphone(String value, final boolean alternate) {
344        value = cleanInput(value);
345        if (value == null) {
346            return null;
347        }
348
349        final boolean slavoGermanic = isSlavoGermanic(value);
350        int index = isSilentStart(value) ? 1 : 0;
351
352        final DoubleMetaphoneResult result = new DoubleMetaphoneResult(getMaxCodeLen());
353
354        while (!result.isComplete() && index <= value.length() - 1) {
355            switch (value.charAt(index)) {
356            case 'A':
357            case 'E':
358            case 'I':
359            case 'O':
360            case 'U':
361            case 'Y':
362                index = handleAEIOUY(result, index);
363                break;
364            case 'B':
365                result.append('P');
366                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
367                break;
368            case '\u00C7':
369                // A C with a Cedilla
370                result.append('S');
371                index++;
372                break;
373            case 'C':
374                index = handleC(value, result, index);
375                break;
376            case 'D':
377                index = handleD(value, result, index);
378                break;
379            case 'F':
380                result.append('F');
381                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
382                break;
383            case 'G':
384                index = handleG(value, result, index, slavoGermanic);
385                break;
386            case 'H':
387                index = handleH(value, result, index);
388                break;
389            case 'J':
390                index = handleJ(value, result, index, slavoGermanic);
391                break;
392            case 'K':
393                result.append('K');
394                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
395                break;
396            case 'L':
397                index = handleL(value, result, index);
398                break;
399            case 'M':
400                result.append('M');
401                index = conditionM0(value, index) ? index + 2 : index + 1;
402                break;
403            case 'N':
404                result.append('N');
405                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
406                break;
407            case '\u00D1':
408                // N with a tilde (spanish ene)
409                result.append('N');
410                index++;
411                break;
412            case 'P':
413                index = handleP(value, result, index);
414                break;
415            case 'Q':
416                result.append('K');
417                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
418                break;
419            case 'R':
420                index = handleR(value, result, index, slavoGermanic);
421                break;
422            case 'S':
423                index = handleS(value, result, index, slavoGermanic);
424                break;
425            case 'T':
426                index = handleT(value, result, index);
427                break;
428            case 'V':
429                result.append('F');
430                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
431                break;
432            case 'W':
433                index = handleW(value, result, index);
434                break;
435            case 'X':
436                index = handleX(value, result, index);
437                break;
438            case 'Z':
439                index = handleZ(value, result, index, slavoGermanic);
440                break;
441            default:
442                index++;
443                break;
444            }
445        }
446
447        return alternate ? result.getAlternate() : result.getPrimary();
448    }
449
450    /**
451     * Encode the value using DoubleMetaphone.  It will only work if
452     * {@code obj} is a {@code String} (like {@code Metaphone}).
453     *
454     * @param obj Object to encode (should be of type String)
455     * @return An encoded Object (will be of type String)
456     * @throws EncoderException encode parameter is not of type String
457     */
458    @Override
459    public Object encode(final Object obj) throws EncoderException {
460        if (!(obj instanceof String)) {
461            throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
462        }
463        return doubleMetaphone((String) obj);
464    }
465
466    /**
467     * Encode the value using DoubleMetaphone.
468     *
469     * @param value String to encode
470     * @return An encoded String
471     */
472    @Override
473    public String encode(final String value) {
474        return doubleMetaphone(value);
475    }
476
477    /**
478     * Returns the maxCodeLen.
479     * @return int
480     */
481    public int getMaxCodeLen() {
482        return this.maxCodeLen;
483    }
484
485    /**
486     * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
487     */
488    private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
489        if (index == 0) {
490            result.append('A');
491        }
492        return index + 1;
493    }
494
495    /**
496     * Handles 'C' cases.
497     */
498    private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
499        if (conditionC0(value, index)) {  // very confusing, moved out
500            result.append('K');
501            index += 2;
502        } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
503            result.append('S');
504            index += 2;
505        } else if (contains(value, index, 2, "CH")) {
506            index = handleCH(value, result, index);
507        } else if (contains(value, index, 2, "CZ") &&
508                   !contains(value, index - 2, 4, "WICZ")) {
509            //-- "Czerny" --//
510            result.append('S', 'X');
511            index += 2;
512        } else if (contains(value, index + 1, 3, "CIA")) {
513            //-- "focaccia" --//
514            result.append('X');
515            index += 3;
516        } else if (contains(value, index, 2, "CC") &&
517                   !(index == 1 && charAt(value, 0) == 'M')) {
518            //-- double "cc" but not "McClelland" --//
519            return handleCC(value, result, index);
520        } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
521            result.append('K');
522            index += 2;
523        } else if (contains(value, index, 2, "CI", "CE", "CY")) {
524            //-- Italian vs. English --//
525            if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
526                result.append('S', 'X');
527            } else {
528                result.append('S');
529            }
530            index += 2;
531        } else {
532            result.append('K');
533            if (contains(value, index + 1, 2, " C", " Q", " G")) {
534                //-- Mac Caffrey, Mac Gregor --//
535                index += 3;
536            } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
537                       !contains(value, index + 1, 2, "CE", "CI")) {
538                index += 2;
539            } else {
540                index++;
541            }
542        }
543
544        return index;
545    }
546
547    /**
548     * Handles 'CC' cases.
549     */
550    private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
551        if (contains(value, index + 2, 1, "I", "E", "H") &&
552            !contains(value, index + 2, 2, "HU")) {
553            //-- "bellocchio" but not "bacchus" --//
554            if (index == 1 && charAt(value, index - 1) == 'A' ||
555                contains(value, index - 1, 5, "UCCEE", "UCCES")) {
556                //-- "accident", "accede", "succeed" --//
557                result.append("KS");
558            } else {
559                //-- "bacci", "bertucci", other Italian --//
560                result.append('X');
561            }
562            index += 3;
563        } else {    // Pierce's rule
564            result.append('K');
565            index += 2;
566        }
567
568        return index;
569    }
570
571    /**
572     * Handles 'CH' cases.
573     */
574    private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
575        if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
576            result.append('K', 'X');
577            return index + 2;
578        }
579        if (conditionCH0(value, index)) {
580            //-- Greek roots ("chemistry", "chorus", etc.) --//
581            result.append('K');
582            return index + 2;
583        }
584        if (conditionCH1(value, index)) {
585            //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
586            result.append('K');
587            return index + 2;
588        }
589        if (index > 0) {
590            if (contains(value, 0, 2, "MC")) {
591                result.append('K');
592            } else {
593                result.append('X', 'K');
594            }
595        } else {
596            result.append('X');
597        }
598        return index + 2;
599    }
600
601    /**
602     * Handles 'D' cases.
603     */
604    private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
605        if (contains(value, index, 2, "DG")) {
606            //-- "Edge" --//
607            if (contains(value, index + 2, 1, "I", "E", "Y")) {
608                result.append('J');
609                index += 3;
610                //-- "Edgar" --//
611            } else {
612                result.append("TK");
613                index += 2;
614            }
615        } else if (contains(value, index, 2, "DT", "DD")) {
616            result.append('T');
617            index += 2;
618        } else {
619            result.append('T');
620            index++;
621        }
622        return index;
623    }
624
625    /**
626     * Handles 'G' cases.
627     */
628    private int handleG(final String value, final DoubleMetaphoneResult result, int index,
629                        final boolean slavoGermanic) {
630        if (charAt(value, index + 1) == 'H') {
631            index = handleGH(value, result, index);
632        } else if (charAt(value, index + 1) == 'N') {
633            if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
634                result.append("KN", "N");
635            } else if (!contains(value, index + 2, 2, "EY") &&
636                       charAt(value, index + 1) != 'Y' && !slavoGermanic) {
637                result.append("N", "KN");
638            } else {
639                result.append("KN");
640            }
641            index += 2;
642        } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
643            result.append("KL", "L");
644            index += 2;
645        } else if (index == 0 &&
646                   (charAt(value, index + 1) == 'Y' ||
647                    contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
648            //-- -ges-, -gep-, -gel-, -gie- at beginning --//
649            result.append('K', 'J');
650            index += 2;
651        } else if ((contains(value, index + 1, 2, "ER") ||
652                    charAt(value, index + 1) == 'Y') &&
653                   !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
654                   !contains(value, index - 1, 1, "E", "I") &&
655                   !contains(value, index - 1, 3, "RGY", "OGY")) {
656            //-- -ger-, -gy- --//
657            result.append('K', 'J');
658            index += 2;
659        } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
660                   contains(value, index - 1, 4, "AGGI", "OGGI")) {
661            //-- Italian "biaggi" --//
662            if (contains(value, 0, 4, "VAN ", "VON ") ||
663                contains(value, 0, 3, "SCH") ||
664                contains(value, index + 1, 2, "ET")) {
665                //-- obvious germanic --//
666                result.append('K');
667            } else if (contains(value, index + 1, 3, "IER")) {
668                result.append('J');
669            } else {
670                result.append('J', 'K');
671            }
672            index += 2;
673        } else {
674            if (charAt(value, index + 1) == 'G') {
675                index += 2;
676            } else {
677                index++;
678            }
679            result.append('K');
680        }
681        return index;
682    }
683
684    /**
685     * Handles 'GH' cases.
686     */
687    private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
688        if (index > 0 && !isVowel(charAt(value, index - 1))) {
689            result.append('K');
690            index += 2;
691        } else if (index == 0) {
692            if (charAt(value, index + 2) == 'I') {
693                result.append('J');
694            } else {
695                result.append('K');
696            }
697            index += 2;
698        } else if (index > 1 && contains(value, index - 2, 1, "B", "H", "D") ||
699                   index > 2 && contains(value, index - 3, 1, "B", "H", "D") ||
700                   index > 3 && contains(value, index - 4, 1, "B", "H")) {
701            //-- Parker's rule (with some further refinements) - "hugh"
702            index += 2;
703        } else {
704            if (index > 2 && charAt(value, index - 1) == 'U' &&
705                contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
706                //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
707                result.append('F');
708            } else if (index > 0 && charAt(value, index - 1) != 'I') {
709                result.append('K');
710            }
711            index += 2;
712        }
713        return index;
714    }
715
716    /**
717     * Handles 'H' cases.
718     */
719    private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
720        //-- only keep if first & before vowel or between 2 vowels --//
721        if ((index == 0 || isVowel(charAt(value, index - 1))) &&
722            isVowel(charAt(value, index + 1))) {
723            result.append('H');
724            index += 2;
725            //-- also takes car of "HH" --//
726        } else {
727            index++;
728        }
729        return index;
730    }
731
732    /**
733     * Handles 'J' cases.
734     */
735    private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
736                        final boolean slavoGermanic) {
737        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
738                //-- obvious Spanish, "Jose", "San Jacinto" --//
739                if (index == 0 && charAt(value, index + 4) == ' ' ||
740                     value.length() == 4 || contains(value, 0, 4, "SAN ")) {
741                    result.append('H');
742                } else {
743                    result.append('J', 'H');
744                }
745                index++;
746            } else {
747                if (index == 0 && !contains(value, index, 4, "JOSE")) {
748                    result.append('J', 'A');
749                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
750                           (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
751                    result.append('J', 'H');
752                } else if (index == value.length() - 1) {
753                    result.append('J', ' ');
754                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
755                           !contains(value, index - 1, 1, "S", "K", "L")) {
756                    result.append('J');
757                }
758
759                if (charAt(value, index + 1) == 'J') {
760                    index += 2;
761                } else {
762                    index++;
763                }
764            }
765        return index;
766    }
767
768    /**
769     * Handles 'L' cases.
770     */
771    private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
772        if (charAt(value, index + 1) == 'L') {
773            if (conditionL0(value, index)) {
774                result.appendPrimary('L');
775            } else {
776                result.append('L');
777            }
778            index += 2;
779        } else {
780            index++;
781            result.append('L');
782        }
783        return index;
784    }
785
786    /**
787     * Handles 'P' cases.
788     */
789    private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
790        if (charAt(value, index + 1) == 'H') {
791            result.append('F');
792            index += 2;
793        } else {
794            result.append('P');
795            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
796        }
797        return index;
798    }
799
800    /**
801     * Handles 'R' cases.
802     */
803    private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
804                        final boolean slavoGermanic) {
805        if (index == value.length() - 1 && !slavoGermanic &&
806            contains(value, index - 2, 2, "IE") &&
807            !contains(value, index - 4, 2, "ME", "MA")) {
808            result.appendAlternate('R');
809        } else {
810            result.append('R');
811        }
812        return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
813    }
814
815    //-- BEGIN CONDITIONS --//
816
817    /**
818     * Handles 'S' cases.
819     */
820    private int handleS(final String value, final DoubleMetaphoneResult result, int index,
821                        final boolean slavoGermanic) {
822        if (contains(value, index - 1, 3, "ISL", "YSL")) {
823            //-- special cases "island", "isle", "carlisle", "carlysle" --//
824            index++;
825        } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
826            //-- special case "sugar-" --//
827            result.append('X', 'S');
828            index++;
829        } else if (contains(value, index, 2, "SH")) {
830            if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
831                //-- germanic --//
832                result.append('S');
833            } else {
834                result.append('X');
835            }
836            index += 2;
837        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
838            //-- Italian and Armenian --//
839            if (slavoGermanic) {
840                result.append('S');
841            } else {
842                result.append('S', 'X');
843            }
844            index += 3;
845        } else if (index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W") ||
846                   contains(value, index + 1, 1, "Z")) {
847            //-- german & anglicisations, e.g. "smith" match "schmidt" //
848            // "snider" match "schneider" --//
849            //-- also, -sz- in slavic language although in hungarian it //
850            //   is pronounced "s" --//
851            result.append('S', 'X');
852            index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
853        } else if (contains(value, index, 2, "SC")) {
854            index = handleSC(value, result, index);
855        } else {
856            if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
857                //-- french e.g. "resnais", "artois" --//
858                result.appendAlternate('S');
859            } else {
860                result.append('S');
861            }
862            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
863        }
864        return index;
865    }
866
867    /**
868     * Handles 'SC' cases.
869     */
870    private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
871        if (charAt(value, index + 2) == 'H') {
872            //-- Schlesinger's rule --//
873            if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
874                //-- Dutch origin, e.g. "school", "schooner" --//
875                if (contains(value, index + 3, 2, "ER", "EN")) {
876                    //-- "schermerhorn", "schenker" --//
877                    result.append("X", "SK");
878                } else {
879                    result.append("SK");
880                }
881            } else if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
882                result.append('X', 'S');
883            } else {
884                result.append('X');
885            }
886        } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
887            result.append('S');
888        } else {
889            result.append("SK");
890        }
891        return index + 3;
892    }
893
894    /**
895     * Handles 'T' cases.
896     */
897    private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
898        if (contains(value, index, 4, "TION") || contains(value, index, 3, "TIA", "TCH")) {
899            result.append('X');
900            index += 3;
901        } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
902            if (contains(value, index + 2, 2, "OM", "AM") ||
903                //-- special case "thomas", "thames" or germanic --//
904                contains(value, 0, 4, "VAN ", "VON ") ||
905                contains(value, 0, 3, "SCH")) {
906                result.append('T');
907            } else {
908                result.append('0', 'T');
909            }
910            index += 2;
911        } else {
912            result.append('T');
913            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
914        }
915        return index;
916    }
917
918    /**
919     * Handles 'W' cases.
920     */
921    private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
922        if (contains(value, index, 2, "WR")) {
923            //-- can also be in middle of word --//
924            result.append('R');
925            index += 2;
926        } else if (index == 0 && (isVowel(charAt(value, index + 1)) ||
927                           contains(value, index, 2, "WH"))) {
928            if (isVowel(charAt(value, index + 1))) {
929                //-- Wasserman should match Vasserman --//
930                result.append('A', 'F');
931            } else {
932                //-- need Uomo to match Womo --//
933                result.append('A');
934            }
935            index++;
936        } else if (index == value.length() - 1 && isVowel(charAt(value, index - 1)) ||
937                   contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
938                   contains(value, 0, 3, "SCH")) {
939            //-- Arnow should match Arnoff --//
940            result.appendAlternate('F');
941            index++;
942        } else if (contains(value, index, 4, "WICZ", "WITZ")) {
943            //-- Polish e.g. "filipowicz" --//
944            result.append("TS", "FX");
945            index += 4;
946        } else {
947            index++;
948        }
949        return index;
950    }
951
952    /**
953     * Handles 'X' cases.
954     */
955    private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
956        if (index == 0) {
957            result.append('S');
958            index++;
959        } else {
960            if (!(index == value.length() - 1 &&
961                  (contains(value, index - 3, 3, "IAU", "EAU") ||
962                   contains(value, index - 2, 2, "AU", "OU")))) {
963                //-- French e.g. breaux --//
964                result.append("KS");
965            }
966            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
967        }
968        return index;
969    }
970
971    //-- BEGIN HELPER FUNCTIONS --//
972
973    /**
974     * Handles 'Z' cases.
975     */
976    private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
977                        final boolean slavoGermanic) {
978        if (charAt(value, index + 1) == 'H') {
979            //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
980            result.append('J');
981            index += 2;
982        } else {
983            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
984                slavoGermanic && index > 0 && charAt(value, index - 1) != 'T') {
985                result.append("S", "TS");
986            } else {
987                result.append('S');
988            }
989            index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
990        }
991        return index;
992    }
993
994    /**
995     * Check if the Double Metaphone values of two {@code String} values
996     * are equal.
997     *
998     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
999     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
1000     * @return {@code true} if the encoded {@code String}s are equal;
1001     *          {@code false} otherwise.
1002     * @see #isDoubleMetaphoneEqual(String,String,boolean)
1003     */
1004    public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
1005        return isDoubleMetaphoneEqual(value1, value2, false);
1006    }
1007
1008    /**
1009     * Check if the Double Metaphone values of two {@code String} values
1010     * are equal, optionally using the alternate value.
1011     *
1012     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
1013     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
1014     * @param alternate use the alternate value if {@code true}.
1015     * @return {@code true} if the encoded {@code String}s are equal;
1016     *          {@code false} otherwise.
1017     */
1018    public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
1019        return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
1020    }
1021
1022    /**
1023     * Determines whether or not the value starts with a silent letter.  It will
1024     * return {@code true} if the value starts with any of 'GN', 'KN',
1025     * 'PN', 'WR' or 'PS'.
1026     */
1027    private boolean isSilentStart(final String value) {
1028        boolean result = false;
1029        for (final String element : SILENT_START) {
1030            if (value.startsWith(element)) {
1031                result = true;
1032                break;
1033            }
1034        }
1035        return result;
1036    }
1037
1038    /**
1039     * Determines whether or not a value is of slavo-germanic origin. A value is
1040     * of slavo-germanic origin if it contains any of 'W', 'K', 'CZ', or 'WITZ'.
1041     */
1042    private boolean isSlavoGermanic(final String value) {
1043        return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
1044                value.contains("CZ") || value.contains("WITZ");
1045    }
1046
1047    /**
1048     * Determines whether or not a character is a vowel or not
1049     */
1050    private boolean isVowel(final char ch) {
1051        return VOWELS.indexOf(ch) != -1;
1052    }
1053
1054    //-- BEGIN INNER CLASSES --//
1055
1056    /**
1057     * Sets the maxCodeLen.
1058     * @param maxCodeLen The maxCodeLen to set
1059     */
1060    public void setMaxCodeLen(final int maxCodeLen) {
1061        this.maxCodeLen = maxCodeLen;
1062    }
1063}