1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.fileupload2.core; 18 19 import java.io.UnsupportedEncodingException; 20 import java.util.HashMap; 21 import java.util.Locale; 22 import java.util.Map; 23 24 /** 25 * A simple parser intended to parse sequences of name/value pairs. 26 * <p> 27 * Parameter values are expected to be enclosed in quotes if they contain unsafe characters, such as '=' characters or separators. Parameter values are optional 28 * and can be omitted. 29 * </p> 30 * <p> 31 * {@code param1 = value; param2 = "anything goes; really"; param3} 32 * </p> 33 */ 34 public class ParameterParser { 35 36 /** 37 * String to be parsed. 38 */ 39 private char[] chars; 40 41 /** 42 * Current position in the string. 43 */ 44 private int pos; 45 46 /** 47 * Maximum position in the string. 48 */ 49 private int len; 50 51 /** 52 * Start of a token. 53 */ 54 private int i1; 55 56 /** 57 * End of a token. 58 */ 59 private int i2; 60 61 /** 62 * Whether names stored in the map should be converted to lower case. 63 */ 64 private boolean lowerCaseNames; 65 66 /** 67 * Default ParameterParser constructor. 68 */ 69 public ParameterParser() { 70 } 71 72 /** 73 * A helper method to process the parsed token. This method removes leading and trailing blanks as well as enclosing quotation marks, when necessary. 74 * 75 * @param quoted {@code true} if quotation marks are expected, {@code false} otherwise. 76 * @return the token 77 */ 78 private String getToken(final boolean quoted) { 79 // Trim leading white spaces 80 while (i1 < i2 && Character.isWhitespace(chars[i1])) { 81 i1++; 82 } 83 // Trim trailing white spaces 84 while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) { 85 i2--; 86 } 87 // Strip away quotation marks if necessary 88 if (quoted && i2 - i1 >= 2 && chars[i1] == '"' && chars[i2 - 1] == '"') { 89 i1++; 90 i2--; 91 } 92 String result = null; 93 if (i2 > i1) { 94 result = new String(chars, i1, i2 - i1); 95 } 96 return result; 97 } 98 99 /** 100 * Tests if there any characters left to parse. 101 * 102 * @return {@code true} if there are unparsed characters, {@code false} otherwise. 103 */ 104 private boolean hasChar() { 105 return this.pos < this.len; 106 } 107 108 /** 109 * Tests {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. 110 * 111 * @return {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. Otherwise returns {@code false} 112 */ 113 public boolean isLowerCaseNames() { 114 return this.lowerCaseNames; 115 } 116 117 /** 118 * Tests if the given character is present in the array of characters. 119 * 120 * @param ch the character to test for presence in the array of characters 121 * @param charray the array of characters to test against 122 * @return {@code true} if the character is present in the array of characters, {@code false} otherwise. 123 */ 124 private boolean isOneOf(final char ch, final char[] charray) { 125 var result = false; 126 for (final char element : charray) { 127 if (ch == element) { 128 result = true; 129 break; 130 } 131 } 132 return result; 133 } 134 135 /** 136 * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique. 137 * 138 * @param charArray the array of characters that contains a sequence of name/value pairs 139 * @param separator the name/value pairs separator 140 * @return a map of name/value pairs 141 */ 142 public Map<String, String> parse(final char[] charArray, final char separator) { 143 if (charArray == null) { 144 return new HashMap<>(); 145 } 146 return parse(charArray, 0, charArray.length, separator); 147 } 148 149 /** 150 * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique. 151 * 152 * @param charArray the array of characters that contains a sequence of name/value pairs 153 * @param offset - the initial offset. 154 * @param length - the length. 155 * @param separator the name/value pairs separator 156 * @return a map of name/value pairs 157 */ 158 public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) { 159 160 if (charArray == null) { 161 return new HashMap<>(); 162 } 163 final var params = new HashMap<String, String>(); 164 this.chars = charArray.clone(); 165 this.pos = offset; 166 this.len = length; 167 168 String paramName; 169 String paramValue; 170 while (hasChar()) { 171 paramName = parseToken(new char[] { '=', separator }); 172 paramValue = null; 173 if (hasChar() && charArray[pos] == '=') { 174 pos++; // skip '=' 175 paramValue = parseQuotedToken(new char[] { separator }); 176 177 if (paramValue != null) { 178 try { 179 paramValue = RFC2231Utils.hasEncodedValue(paramName) ? RFC2231Utils.decodeText(paramValue) : MimeUtils.decodeText(paramValue); 180 } catch (final UnsupportedEncodingException ignored) { 181 // let's keep the original value in this case 182 } 183 } 184 } 185 if (hasChar() && charArray[pos] == separator) { 186 pos++; // skip separator 187 } 188 if (paramName != null && !paramName.isEmpty()) { 189 paramName = RFC2231Utils.stripDelimiter(paramName); 190 if (this.lowerCaseNames) { 191 paramName = paramName.toLowerCase(Locale.ENGLISH); 192 } 193 params.put(paramName, paramValue); 194 } 195 } 196 return params; 197 } 198 199 /** 200 * Parses a map of name/value pairs from the given string. Names are expected to be unique. 201 * 202 * @param str the string that contains a sequence of name/value pairs 203 * @param separator the name/value pairs separator 204 * @return a map of name/value pairs 205 */ 206 public Map<String, String> parse(final String str, final char separator) { 207 if (str == null) { 208 return new HashMap<>(); 209 } 210 return parse(str.toCharArray(), separator); 211 } 212 213 /** 214 * Parses a map of name/value pairs from the given string. Names are expected to be unique. Multiple separators may be specified and the earliest found in 215 * the input string is used. 216 * 217 * @param str the string that contains a sequence of name/value pairs 218 * @param separators the name/value pairs separators 219 * @return a map of name/value pairs 220 */ 221 public Map<String, String> parse(final String str, final char[] separators) { 222 if (separators == null || separators.length == 0) { 223 return new HashMap<>(); 224 } 225 var separator = separators[0]; 226 if (str != null) { 227 var idx = str.length(); 228 for (final char separator2 : separators) { 229 final var tmp = str.indexOf(separator2); 230 if (tmp != -1 && tmp < idx) { 231 idx = tmp; 232 separator = separator2; 233 } 234 } 235 } 236 return parse(str, separator); 237 } 238 239 /** 240 * Parses out a token until any of the given terminators is encountered outside the quotation marks. 241 * 242 * @param terminators the array of terminating characters. Any of these characters when encountered outside the quotation marks signify the end of the token 243 * @return the token 244 */ 245 private String parseQuotedToken(final char[] terminators) { 246 char ch; 247 i1 = pos; 248 i2 = pos; 249 var quoted = false; 250 var charEscaped = false; 251 while (hasChar()) { 252 ch = chars[pos]; 253 if (!quoted && isOneOf(ch, terminators)) { 254 break; 255 } 256 if (!charEscaped && ch == '"') { 257 quoted = !quoted; 258 } 259 charEscaped = !charEscaped && ch == '\\'; 260 i2++; 261 pos++; 262 263 } 264 return getToken(true); 265 } 266 267 /** 268 * Parses out a token until any of the given terminators is encountered. 269 * 270 * @param terminators the array of terminating characters. Any of these characters when encountered signify the end of the token 271 * @return the token 272 */ 273 private String parseToken(final char[] terminators) { 274 char ch; 275 i1 = pos; 276 i2 = pos; 277 while (hasChar()) { 278 ch = chars[pos]; 279 if (isOneOf(ch, terminators)) { 280 break; 281 } 282 i2++; 283 pos++; 284 } 285 return getToken(false); 286 } 287 288 /** 289 * Sets the flag if parameter names are to be converted to lower case when name/value pairs are parsed. 290 * 291 * @param lowerCaseNames {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. {@code false} otherwise. 292 */ 293 public void setLowerCaseNames(final boolean lowerCaseNames) { 294 this.lowerCaseNames = lowerCaseNames; 295 } 296 297 }