1 package org.apache.velocity.io;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.PushbackInputStream;
26
27 import org.apache.velocity.util.ExceptionUtils;
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 public class UnicodeInputStream
46 extends InputStream
47 {
48
49
50 public static final UnicodeBOM UTF8_BOM = new UnicodeBOM("UTF-8", new byte [] { (byte)0xef, (byte)0xbb, (byte)0xbf });
51
52
53 public static final UnicodeBOM UTF16LE_BOM = new UnicodeBOM("UTF-16LE", new byte [] { (byte)0xff, (byte)0xfe });
54
55
56 public static final UnicodeBOM UTF16BE_BOM = new UnicodeBOM("UTF-16BE", new byte [] { (byte)0xfe, (byte)0xff });
57
58
59
60
61
62
63 public static final UnicodeBOM UTF32LE_BOM = new UnicodeBOM("UTF-32LE", new byte [] { (byte)0xff, (byte)0xfe, (byte)0x00, (byte)0x00 });
64
65
66
67
68
69
70 public static final UnicodeBOM UTF32BE_BOM = new UnicodeBOM("UTF-32BE", new byte [] { (byte)0x00, (byte)0x00, (byte)0xfe, (byte)0xff });
71
72
73 private static final int MAX_BOM_SIZE = 4;
74
75
76 private byte [] buf = new byte[MAX_BOM_SIZE];
77
78
79 private int pos = 0;
80
81
82 private final String encoding;
83
84
85 private final boolean skipBOM;
86
87 private final PushbackInputStream inputStream;
88
89
90
91
92
93
94 public UnicodeInputStream(final InputStream inputStream)
95 throws IllegalStateException, IOException
96 {
97 this(inputStream, true);
98 }
99
100
101
102
103
104
105
106 public UnicodeInputStream(final InputStream inputStream, boolean skipBOM)
107 throws IllegalStateException, IOException
108 {
109 super();
110
111 this.skipBOM = skipBOM;
112 this.inputStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE);
113
114 try
115 {
116 this.encoding = readEncoding();
117 }
118 catch (IOException ioe)
119 {
120 IllegalStateException ex = new IllegalStateException("Could not read BOM from Stream");
121 ExceptionUtils.setCause(ex, ioe);
122 throw ex;
123 }
124 }
125
126
127
128
129
130
131 public boolean isSkipBOM()
132 {
133 return skipBOM;
134 }
135
136
137
138
139
140
141
142
143 public String getEncodingFromStream()
144 {
145 return encoding;
146 }
147
148
149
150
151
152
153
154 protected String readEncoding()
155 throws IOException
156 {
157 pos = 0;
158
159 UnicodeBOM encoding = null;
160
161
162 if (readByte())
163 {
164
165
166
167
168
169
170
171
172 switch (buf[0])
173 {
174 case (byte)0x00:
175 encoding = match(UTF32BE_BOM, null);
176 break;
177 case (byte)0xef:
178 encoding = match(UTF8_BOM, null);
179 break;
180 case (byte)0xfe:
181 encoding = match(UTF16BE_BOM, null);
182 break;
183 case (byte)0xff:
184 encoding = match(UTF16LE_BOM, null);
185
186 if (encoding != null)
187 {
188 encoding = match(UTF32LE_BOM, encoding);
189 }
190 break;
191
192 default:
193 encoding = null;
194 break;
195 }
196 }
197
198 pushback(encoding);
199
200 return (encoding != null) ? encoding.getEncoding() : null;
201 }
202
203 private final UnicodeBOM match(final UnicodeBOM matchEncoding, final UnicodeBOM noMatchEncoding)
204 throws IOException
205 {
206 byte [] bom = matchEncoding.getBytes();
207
208 for (int i = 0; i < bom.length; i++)
209 {
210 if (pos <= i)
211 {
212 if (!readByte())
213 {
214 return noMatchEncoding;
215 }
216 }
217
218 if (bom[i] != buf[i])
219 {
220 return noMatchEncoding;
221 }
222 }
223
224 return matchEncoding;
225 }
226
227 private final boolean readByte()
228 throws IOException
229 {
230 int res = inputStream.read();
231 if (res == -1)
232 {
233 return false;
234 }
235
236 if (pos >= buf.length)
237 {
238 throw new IOException("BOM read error");
239 }
240
241 buf[pos++] = (byte) res;
242 return true;
243 }
244
245 private final void pushback(final UnicodeBOM matchBOM)
246 throws IOException
247 {
248 int count = pos;
249 int start = 0;
250
251 if (matchBOM != null && skipBOM)
252 {
253
254
255
256 start = matchBOM.getBytes().length;
257 count = (pos - start);
258
259 if (count < 0)
260 {
261 throw new IllegalStateException("Match has more bytes than available!");
262 }
263 }
264
265 inputStream.unread(buf, start, count);
266 }
267
268
269
270
271 public void close()
272 throws IOException
273 {
274 inputStream.close();
275 }
276
277
278
279
280 public int available()
281 throws IOException
282 {
283 return inputStream.available();
284 }
285
286
287
288
289 public void mark(final int readlimit)
290 {
291 inputStream.mark(readlimit);
292 }
293
294
295
296
297 public boolean markSupported()
298 {
299 return inputStream.markSupported();
300 }
301
302
303
304
305 public int read()
306 throws IOException
307 {
308 return inputStream.read();
309 }
310
311
312
313
314 public int read(final byte [] b)
315 throws IOException
316 {
317 return inputStream.read(b);
318 }
319
320
321
322
323 public int read(final byte [] b, final int off, final int len)
324 throws IOException
325 {
326 return inputStream.read(b, off, len);
327 }
328
329
330
331
332 public void reset()
333 throws IOException
334 {
335 inputStream.reset();
336 }
337
338
339
340
341 public long skip(final long n)
342 throws IOException
343 {
344 return inputStream.skip(n);
345 }
346
347
348
349
350
351
352
353 static final class UnicodeBOM
354 {
355 private final String encoding;
356
357 private final byte [] bytes;
358
359 private UnicodeBOM(final String encoding, final byte [] bytes)
360 {
361 this.encoding = encoding;
362 this.bytes = bytes;
363 }
364
365 String getEncoding()
366 {
367 return encoding;
368 }
369
370 byte [] getBytes()
371 {
372 return bytes;
373 }
374 }
375 }