001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.validator.routines;
018
019import java.io.Serializable;
020import java.util.Arrays;
021import java.util.Locale;
022
023import org.apache.commons.validator.routines.checkdigit.ISINCheckDigit;
024
025/**
026 * <b>ISIN</b> (International Securities Identifying Number) validation.
027 *
028 * <p>
029 * ISIN Numbers are 12 character alphanumeric codes used to identify Securities.
030 * </p>
031 *
032 * <p>
033 * ISINs consist of two alphabetic characters,
034 * which are the ISO 3166-1 alpha-2 code for the issuing country,
035 * nine alpha-numeric characters (the National Securities Identifying Number, or NSIN, which identifies the security),
036 * and one numerical check digit.
037 * They are 12 characters in length.
038 * </p>
039 *
040 * <p>
041 * See <a href="https://en.wikipedia.org/wiki/ISIN">Wikipedia - ISIN</a>
042 * for more details.
043 * </p>
044 *
045 * @since 1.7
046 */
047public class ISINValidator implements Serializable {
048
049    private static final long serialVersionUID = -5964391439144260936L;
050
051    private static final String ISIN_REGEX = "([A-Z]{2}[A-Z0-9]{9}[0-9])";
052
053    private static final CodeValidator VALIDATOR = new CodeValidator(ISIN_REGEX, 12, ISINCheckDigit.ISIN_CHECK_DIGIT);
054
055    /** ISIN Code Validator (no countryCode check) */
056    private static final ISINValidator ISIN_VALIDATOR_FALSE = new ISINValidator(false);
057
058    /** ISIN Code Validator (with countryCode check) */
059    private static final ISINValidator ISIN_VALIDATOR_TRUE = new ISINValidator(true);
060
061    private static final String [] CCODES = Locale.getISOCountries();
062
063    /**
064     * All codes from ISO 3166-1 alpha-2 except unassigned code elements.
065     *
066     * From https://www.iso.org/obp/ui/#iso:pub:PUB500001:en as of 2024-03-23.
067     */
068    private static final String[] SPECIALS = {
069            "AA",
070            "AC",
071            "AD",
072            "AE",
073            "AF",
074            "AG",
075            "AI",
076            "AL",
077            "AM",
078            "AN",
079            "AO",
080            "AP",
081            "AQ",
082            "AR",
083            "AS",
084            "AT",
085            "AU",
086            "AW",
087            "AX",
088            "AZ",
089            "BA",
090            "BB",
091            "BD",
092            "BE",
093            "BF",
094            "BG",
095            "BH",
096            "BI",
097            "BJ",
098            "BL",
099            "BM",
100            "BN",
101            "BO",
102            "BQ",
103            "BR",
104            "BS",
105            "BT",
106            "BU",
107            "BV",
108            "BW",
109            "BX",
110            "BY",
111            "BZ",
112            "CA",
113            "CC",
114            "CD",
115            "CF",
116            "CG",
117            "CH",
118            "CI",
119            "CK",
120            "CL",
121            "CM",
122            "CN",
123            "CO",
124            "CP",
125            "CQ",
126            "CR",
127            "CS",
128            "CT",
129            "CU",
130            "CV",
131            "CW",
132            "CX",
133            "CY",
134            "CZ",
135            "DD",
136            "DE",
137            "DG",
138            "DJ",
139            "DK",
140            "DM",
141            "DO",
142            "DY",
143            "DZ",
144            "EA",
145            "EC",
146            "EE",
147            "EF",
148            "EG",
149            "EH",
150            "EM",
151            "EP",
152            "ER",
153            "ES",
154            "ET",
155            "EU",
156            "EV",
157            "EW",
158            "EZ",
159            "FI",
160            "FJ",
161            "FK",
162            "FL",
163            "FM",
164            "FO",
165            "FQ",
166            "FR",
167            "FX",
168            "GA",
169            "GB",
170            "GC",
171            "GD",
172            "GE",
173            "GF",
174            "GG",
175            "GH",
176            "GI",
177            "GL",
178            "GM",
179            "GN",
180            "GP",
181            "GQ",
182            "GR",
183            "GS",
184            "GT",
185            "GU",
186            "GW",
187            "GY",
188            "HK",
189            "HM",
190            "HN",
191            "HR",
192            "HT",
193            "HU",
194            "HV",
195            "IB",
196            "IC",
197            "ID",
198            "IE",
199            "IL",
200            "IM",
201            "IN",
202            "IO",
203            "IQ",
204            "IR",
205            "IS",
206            "IT",
207            "JA",
208            "JE",
209            "JM",
210            "JO",
211            "JP",
212            "JT",
213            "KE",
214            "KG",
215            "KH",
216            "KI",
217            "KM",
218            "KN",
219            "KP",
220            "KR",
221            "KW",
222            "KY",
223            "KZ",
224            "LA",
225            "LB",
226            "LC",
227            "LF",
228            "LI",
229            "LK",
230            "LR",
231            "LS",
232            "LT",
233            "LU",
234            "LV",
235            "LY",
236            "MA",
237            "MC",
238            "MD",
239            "ME",
240            "MF",
241            "MG",
242            "MH",
243            "MI",
244            "MK",
245            "ML",
246            "MM",
247            "MN",
248            "MO",
249            "MP",
250            "MQ",
251            "MR",
252            "MS",
253            "MT",
254            "MU",
255            "MV",
256            "MW",
257            "MX",
258            "MY",
259            "MZ",
260            "NA",
261            "NC",
262            "NE",
263            "NF",
264            "NG",
265            "NH",
266            "NI",
267            "NL",
268            "NO",
269            "NP",
270            "NQ",
271            "NR",
272            "NT",
273            "NU",
274            "NZ",
275            "OA",
276            "OM",
277            "PA",
278            "PC",
279            "PE",
280            "PF",
281            "PG",
282            "PH",
283            "PI",
284            "PK",
285            "PL",
286            "PM",
287            "PN",
288            "PR",
289            "PS",
290            "PT",
291            "PU",
292            "PW",
293            "PY",
294            "PZ",
295            "QA",
296            "QM",
297            "QN",
298            "QO",
299            "QP",
300            "QQ",
301            "QR",
302            "QS",
303            "QT",
304            "QU",
305            "QV",
306            "QW",
307            "QX",
308            "QY",
309            "QZ",
310            "RA",
311            "RB",
312            "RC",
313            "RE",
314            "RH",
315            "RI",
316            "RL",
317            "RM",
318            "RN",
319            "RO",
320            "RP",
321            "RS",
322            "RU",
323            "RW",
324            "SA",
325            "SB",
326            "SC",
327            "SD",
328            "SE",
329            "SF",
330            "SG",
331            "SH",
332            "SI",
333            "SJ",
334            "SK",
335            "SL",
336            "SM",
337            "SN",
338            "SO",
339            "SR",
340            "SS",
341            "ST",
342            "SU",
343            "SV",
344            "SX",
345            "SY",
346            "SZ",
347            "TA",
348            "TC",
349            "TD",
350            "TF",
351            "TG",
352            "TH",
353            "TJ",
354            "TK",
355            "TL",
356            "TM",
357            "TN",
358            "TO",
359            "TP",
360            "TR",
361            "TT",
362            "TV",
363            "TW",
364            "TZ",
365            "UA",
366            "UG",
367            "UK",
368            "UM",
369            "UN",
370            "US",
371            "UY",
372            "UZ",
373            "VA",
374            "VC",
375            "VD",
376            "VE",
377            "VG",
378            "VI",
379            "VN",
380            "VU",
381            "WF",
382            "WG",
383            "WK",
384            "WL",
385            "WO",
386            "WS",
387            "WV",
388            "XA",
389            "XB",
390            "XC",
391            "XD",
392            "XE",
393            "XF",
394            "XG",
395            "XH",
396            "XI",
397            "XJ",
398            "XK",
399            "XL",
400            "XM",
401            "XN",
402            "XO",
403            "XP",
404            "XQ",
405            "XR",
406            "XS",
407            "XT",
408            "XU",
409            "XV",
410            "XW",
411            "XX",
412            "XY",
413            "XZ",
414            "YD",
415            "YE",
416            "YT",
417            "YU",
418            "YV",
419            "ZA",
420            "ZM",
421            "ZR",
422            "ZW",
423            "ZZ",
424    };
425
426    static {
427        Arrays.sort(CCODES); // we cannot assume the codes are sorted
428        Arrays.sort(SPECIALS); // Just in case ...
429    }
430
431    /**
432     * Gets the singleton instance of the ISIN validator.
433     *
434     * @param checkCountryCode whether to check the country-code prefix or not
435     * @return A singleton instance of the appropriate ISIN validator.
436     */
437    public static ISINValidator getInstance(final boolean checkCountryCode) {
438        return checkCountryCode ? ISIN_VALIDATOR_TRUE : ISIN_VALIDATOR_FALSE;
439    }
440
441    private final boolean checkCountryCode;
442
443    private ISINValidator(final boolean checkCountryCode) {
444        this.checkCountryCode = checkCountryCode;
445    }
446
447    private boolean checkCode(final String code) {
448        return Arrays.binarySearch(CCODES, code) >= 0
449               ||
450               Arrays.binarySearch(SPECIALS, code) >= 0
451        ;
452    }
453
454    /**
455     * Tests whether the code is a valid ISIN code after any transformation
456     * by the validate routine.
457     *
458     * @param code The code to validate.
459     * @return {@code true} if a valid ISIN
460     * code, otherwise {@code false}.
461     */
462    public boolean isValid(final String code) {
463        final boolean valid = VALIDATOR.isValid(code);
464        if (valid && checkCountryCode) {
465            return checkCode(code.substring(0, 2));
466        }
467        return valid;
468    }
469
470    /**
471     * Checks the code is valid ISIN code.
472     *
473     * @param code The code to validate.
474     * @return A valid ISIN code if valid, otherwise {@code null}.
475     */
476    public Object validate(final String code) {
477        final Object validate = VALIDATOR.validate(code);
478        if (validate != null && checkCountryCode) {
479            return checkCode(code.substring(0, 2)) ? validate : null;
480        }
481        return validate;
482    }
483
484}