diff --git a/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt b/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt index 4712f94b3..dfeb4cb6d 100644 --- a/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt +++ b/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt @@ -118,12 +118,12 @@ object ChapterProvider { val matcher = AppPattern.imgPattern.matcher(text) if (matcher.find()) { matcher.group(1)?.let { src -> - if (!book.isEpub()) { + //if (!book.isEpub()) { durY = setTypeImage( book, bookChapter, src, durY, textPages, book.getImageStyle() ) - } + //} } } else { val isTitle = index == 0 diff --git a/epublib/src/main/java/me/ag2s/epublib/util/IOUtil.java b/epublib/src/main/java/me/ag2s/epublib/util/IOUtil.java index 8bb4bf8cc..e75d91f70 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/IOUtil.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/IOUtil.java @@ -1,6 +1,5 @@ package me.ag2s.epublib.util; -import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; @@ -12,154 +11,204 @@ import java.io.Writer; /** * Most of the functions herein are re-implementations of the ones in * apache io IOUtils. - * + *

* The reason for re-implementing this is that the functions are fairly simple * and using my own implementation saves the inclusion of a 200Kb jar file. */ public class IOUtil { - public static final int IO_COPY_BUFFER_SIZE = 1024 * 4; + /** + * Represents the end-of-file (or stream). + * @since 2.5 (made public) + */ + public static final int EOF = -1; - /** - * Gets the contents of the Reader as a byte[], with the given character encoding. - * - * @param in g - * @param encoding g - * @return the contents of the Reader as a byte[], with the given character encoding. - * @throws IOException g - */ - public static byte[] toByteArray(Reader in, String encoding) - throws IOException { - StringWriter out = new StringWriter(); - copy(in, out); - out.flush(); - return out.toString().getBytes(encoding); - } + public static final int IO_COPY_BUFFER_SIZE = 1024 * 8; + public static final int DEFAULT_BUFFER_SIZE = 8192; - /** - * Returns the contents of the InputStream as a byte[] - * - * @param in f - * @return the contents of the InputStream as a byte[] - * @throws IOException f - */ - public static byte[] toByteArray(InputStream in) throws IOException { - ByteArrayOutputStream result = new ByteArrayOutputStream(); - copy(in, result); - result.flush(); - return result.toByteArray(); - } - - /** - * Reads data from the InputStream, using the specified buffer size. - * - * This is meant for situations where memory is tight, since - * it prevents buffer expansion. - * - * @param in the stream to read data from - * @param size the size of the array to create - * @return the array, or null - * @throws IOException f - */ - public static byte[] toByteArray(InputStream in, int size) - throws IOException { - - try { - ByteArrayOutputStream result; - - if (size > 0) { - result = new ByteArrayOutputStream(size); - } else { - result = new ByteArrayOutputStream(); - } - - copy(in, result); - result.flush(); - return result.toByteArray(); - } catch (OutOfMemoryError error) { - //Return null so it gets loaded lazily. - return null; + /** + * Gets the contents of the Reader as a byte[], with the given character encoding. + * + * @param in g + * @param encoding g + * @return the contents of the Reader as a byte[], with the given character encoding. + * @throws IOException g + */ + public static byte[] toByteArray(Reader in, String encoding) + throws IOException { + StringWriter out = new StringWriter(); + copy(in, out); + out.flush(); + return out.toString().getBytes(encoding); } - } + /** + * Returns the contents of the InputStream as a byte[] + * + * @param in f + * @return the contents of the InputStream as a byte[] + * @throws IOException f + */ + public static byte[] toByteArray(InputStream in) throws IOException { + ByteArrayOutputStream result = new ByteArrayOutputStream(); + copy(in, result); + result.flush(); + return result.toByteArray(); + } + + /** + * Reads data from the InputStream, using the specified buffer size. + *

+ * This is meant for situations where memory is tight, since + * it prevents buffer expansion. + * + * @param in the stream to read data from + * @param size the size of the array to create + * @return the array, or null + * @throws IOException f + */ + public static byte[] toByteArray(InputStream in, int size) + throws IOException { + + try { + ByteArrayOutputStream result; + + if (size > 0) { + result = new ByteArrayOutputStream(size); + } else { + result = new ByteArrayOutputStream(); + } + + copy(in, result); + result.flush(); + return result.toByteArray(); + } catch (OutOfMemoryError error) { + //Return null so it gets loaded lazily. + return null; + } + + } - /** - * if totalNrRead < 0 then totalNrRead is returned, if - * (nrRead + totalNrRead) < Integer.MAX_VALUE then nrRead + totalNrRead - * is returned, -1 otherwise. - * - * @param nrRead f - * @param totalNrNread f - * @return if totalNrRead < 0 then totalNrRead is returned, if - * (nrRead + totalNrRead) < Integer.MAX_VALUE then nrRead + totalNrRead - * is returned, -1 otherwise. - */ - protected static int calcNewNrReadSize(int nrRead, int totalNrNread) { - if (totalNrNread < 0) { - return totalNrNread; + /** + * if totalNrRead < 0 then totalNrRead is returned, if + * (nrRead + totalNrRead) < Integer.MAX_VALUE then nrRead + totalNrRead + * is returned, -1 otherwise. + * + * @param nrRead f + * @param totalNrNread f + * @return if totalNrRead < 0 then totalNrRead is returned, if + * (nrRead + totalNrRead) < Integer.MAX_VALUE then nrRead + totalNrRead + * is returned, -1 otherwise. + */ + protected static int calcNewNrReadSize(int nrRead, int totalNrNread) { + if (totalNrNread < 0) { + return totalNrNread; + } + if (totalNrNread > (Integer.MAX_VALUE - nrRead)) { + return -1; + } else { + return (totalNrNread + nrRead); + } } - if (totalNrNread > (Integer.MAX_VALUE - nrRead)) { - return -1; - } else { - return (totalNrNread + nrRead); - } - } - /** - * Copies the contents of the InputStream to the OutputStream. - * - * @param in f - * @param out f - * @return the nr of bytes read, or -1 if the amount > Integer.MAX_VALUE - * @throws IOException f - */ - public static int copy(InputStream in, OutputStream out) - throws IOException { - byte[] buffer = new byte[IO_COPY_BUFFER_SIZE]; - int readSize ; - int result = 0; - while ((readSize = in.read(buffer)) >= 0) { - out.write(buffer, 0, readSize); - result = calcNewNrReadSize(readSize, result); + /** + * Copies the contents of the InputStream to the OutputStream. + * + * @param in f + * @param out f + * @return the nr of bytes read, or -1 if the amount > Integer.MAX_VALUE + * @throws IOException f + */ + public static int copy(InputStream in, OutputStream out) + throws IOException { + byte[] buffer = new byte[IO_COPY_BUFFER_SIZE]; + int readSize; + int result = 0; + while ((readSize = in.read(buffer)) >= 0) { + out.write(buffer, 0, readSize); + result = calcNewNrReadSize(readSize, result); + } + out.flush(); + return result; } - out.flush(); - return result; - } - /** - * Copies the contents of the Reader to the Writer. - * - * @param in f - * @param out f - * @return the nr of characters read, or -1 if the amount > Integer.MAX_VALUE - * @throws IOException f - */ - public static int copy(Reader in, Writer out) throws IOException { - char[] buffer = new char[IO_COPY_BUFFER_SIZE]; - int readSize; - int result = 0; - while ((readSize = in.read(buffer)) >= 0) { - out.write(buffer, 0, readSize); - result = calcNewNrReadSize(readSize, result); + /** + * Copies the contents of the Reader to the Writer. + * + * @param in f + * @param out f + * @return the nr of characters read, or -1 if the amount > Integer.MAX_VALUE + * @throws IOException f + */ + public static int copy(Reader in, Writer out) throws IOException { + char[] buffer = new char[IO_COPY_BUFFER_SIZE]; + int readSize; + int result = 0; + while ((readSize = in.read(buffer)) >= 0) { + out.write(buffer, 0, readSize); + result = calcNewNrReadSize(readSize, result); + } + out.flush(); + return result; + } + /** + * Returns the length of the given array in a null-safe manner. + * + * @param array an array or null + * @return the array length -- or 0 if the given array is null. + * @since 2.7 + */ + public static int length(final byte[] array) { + return array == null ? 0 : array.length; } - out.flush(); - return result; - } - public static String Stream2String(InputStream inputStream) { - String str; - try { - BufferedInputStream bis = new BufferedInputStream(inputStream); - ByteArrayOutputStream buf = new ByteArrayOutputStream(); - for (int result = bis.read(); result != -1; result = bis.read()) { - buf.write((byte) result); - } - str=buf.toString("UTF-8"); - }catch (Exception e){ - str=null; + /** + * Returns the length of the given array in a null-safe manner. + * + * @param array an array or null + * @return the array length -- or 0 if the given array is null. + * @since 2.7 + */ + public static int length(final char[] array) { + return array == null ? 0 : array.length; + } + + /** + * Returns the length of the given CharSequence in a null-safe manner. + * + * @param csq a CharSequence or null + * @return the CharSequence length -- or 0 if the given CharSequence is null. + * @since 2.7 + */ + public static int length(final CharSequence csq) { + return csq == null ? 0 : csq.length(); + } + + /** + * Returns the length of the given array in a null-safe manner. + * + * @param array an array or null + * @return the array length -- or 0 if the given array is null. + * @since 2.7 + */ + public static int length(final Object[] array) { + return array == null ? 0 : array.length; + } + + @SuppressWarnings("unused") + public static String Stream2String(InputStream inputStream) { + ByteArrayOutputStream result = new ByteArrayOutputStream(); + try { + byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; + int length; + while ((length = inputStream.read(buffer)) != -1) { + result.write(buffer, 0, length); + } + return result.toString(); + } catch (Exception e) { + return e.getLocalizedMessage(); + } + } -// StandardCharsets.UTF_8.name() > JDK 7 - return str; - } } diff --git a/epublib/src/main/java/me/ag2s/epublib/util/ResourceUtil.java b/epublib/src/main/java/me/ag2s/epublib/util/ResourceUtil.java index faf42755f..9b8f7896e 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/ResourceUtil.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/ResourceUtil.java @@ -75,16 +75,15 @@ public class ResourceUtil { } } - String html = ""; - html += ""; - html += "" + title + "" + + + return "" + + "" + + "" + title + "" + "" + - ""; - html += "

" + title + "

" + + "" + + "

" + title + "

" + body + ""; - - return html; } /** diff --git a/epublib/src/main/java/me/ag2s/epublib/util/StringUtil.java b/epublib/src/main/java/me/ag2s/epublib/util/StringUtil.java index acb3093e9..929278b98 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/StringUtil.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/StringUtil.java @@ -273,21 +273,25 @@ public class StringUtil { } return text.substring(cPos + 1); } + // 移除字符串首尾空字符的高效方法(利用ASCII值判断,包括全角空格) public static String FixTrim(String s) { - if (s==null){ + if (s == null || s.isEmpty()) { return ""; } - Pattern r = Pattern.compile("^[\\s]{1,9}(.*?)[\\s]{1,9}$"); - Matcher m = r.matcher(s); - if (m.find()) { - s= m.group(1); + int start = 0; + int len = s.length(); + int end = len - 1; + while (start < end && (s.charAt(start) <= 0x20 || s.charAt(start) == ' ')) { + ++start; } - if(s==null){ - return ""; + while (start < end && (s.charAt(end) <= 0x20 || s.charAt(end) == ' ')) { + --end; } - //移除GBK中文全角空格 - s = s.replace(" ", ""); - return s; + if (end < len) { + ++end; + } + return (start > 0 || end < len) ? s.substring(start, end) : s; + } } diff --git a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/BOMInputStream.java b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/BOMInputStream.java index d3b1f6d9e..ed299f591 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/BOMInputStream.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/BOMInputStream.java @@ -16,62 +16,85 @@ */ package me.ag2s.epublib.util.commons.io; + + +import android.os.Build; + import java.io.IOException; import java.io.InputStream; import java.util.Arrays; +import java.util.Comparator; import java.util.List; +import me.ag2s.epublib.util.IOUtil; + +import static me.ag2s.epublib.util.IOUtil.EOF; + + /** - * This class is used to wrap a stream that includes an encoded - * {@link ByteOrderMark} as its first bytes. + * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes. * - * This class detects these bytes and, if required, can automatically skip them - * and return the subsequent byte as the first byte in the stream. + * This class detects these bytes and, if required, can automatically skip them and return the subsequent byte as the + * first byte in the stream. * * The {@link ByteOrderMark} implementation has the following pre-defined BOMs: * * * - *

Example 1 - Detect and exclude a UTF-8 BOM

+ *

Example 1 - Detect and exclude a UTF-8 BOM

+ * *
- *      BOMInputStream bomIn = new BOMInputStream(in);
- *      if (bomIn.hasBOM()) {
- *          // has a UTF-8 BOM
- *      }
+ * BOMInputStream bomIn = new BOMInputStream(in);
+ * if (bomIn.hasBOM()) {
+ *     // has a UTF-8 BOM
+ * }
  * 
* - *

Example 2 - Detect a UTF-8 BOM (but don't exclude it)

+ *

Example 2 - Detect a UTF-8 BOM (but don't exclude it)

+ * *
- *      boolean include = true;
- *      BOMInputStream bomIn = new BOMInputStream(in, include);
- *      if (bomIn.hasBOM()) {
- *          // has a UTF-8 BOM
- *      }
+ * boolean include = true;
+ * BOMInputStream bomIn = new BOMInputStream(in, include);
+ * if (bomIn.hasBOM()) {
+ *     // has a UTF-8 BOM
+ * }
  * 
* - *

Example 3 - Detect Multiple BOMs

+ *

Example 3 - Detect Multiple BOMs

+ * *
- *      BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
- *      if (bomIn.hasBOM() == false) {
- *          // No BOM found
- *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
- *          // has a UTF-16LE BOM
- *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
- *          // has a UTF-16BE BOM
- *      }
+ * BOMInputStream bomIn = new BOMInputStream(in,
+ *   ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
+ *   ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
+ *   );
+ * if (bomIn.hasBOM() == false) {
+ *     // No BOM found
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
+ *     // has a UTF-16LE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
+ *     // has a UTF-16BE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) {
+ *     // has a UTF-32LE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) {
+ *     // has a UTF-32BE BOM
+ * }
  * 
* * @see ByteOrderMark * @see Wikipedia - Byte Order Mark - * @version $Revision: 1052095 $ $Date: 2010-12-22 23:03:20 +0000 (Wed, 22 Dec 2010) $ - * @since Commons IO 2.0 + * @since 2.0 */ public class BOMInputStream extends ProxyInputStream { private final boolean include; + /** + * BOMs are sorted from longest to shortest. + */ private final List boms; private ByteOrderMark byteOrderMark; private int[] firstBytes; @@ -81,112 +104,138 @@ public class BOMInputStream extends ProxyInputStream { private boolean markedAtStart; /** - * Constructs a new BOM InputStream that excludes - * a {@link ByteOrderMark#UTF_8} BOM. - * @param delegate the InputStream to delegate to + * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM. + * + * @param delegate + * the InputStream to delegate to */ @SuppressWarnings("unused") - public BOMInputStream(InputStream delegate) { + public BOMInputStream(final InputStream delegate) { this(delegate, false, ByteOrderMark.UTF_8); } /** - * Constructs a new BOM InputStream that detects a - * a {@link ByteOrderMark#UTF_8} and optionally includes it. - * @param delegate the InputStream to delegate to - * @param include true to include the UTF-8 BOM or - * false to exclude it + * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it. + * + * @param delegate + * the InputStream to delegate to + * @param include + * true to include the UTF-8 BOM or false to exclude it */ @SuppressWarnings("unused") - public BOMInputStream(InputStream delegate, boolean include) { + public BOMInputStream(final InputStream delegate, final boolean include) { this(delegate, include, ByteOrderMark.UTF_8); } /** - * Constructs a new BOM InputStream that excludes - * the specified BOMs. - * @param delegate the InputStream to delegate to - * @param boms The BOMs to detect and exclude + * Constructs a new BOM InputStream that excludes the specified BOMs. + * + * @param delegate + * the InputStream to delegate to + * @param boms + * The BOMs to detect and exclude */ @SuppressWarnings("unused") - public BOMInputStream(InputStream delegate, ByteOrderMark... boms) { + public BOMInputStream(final InputStream delegate, final ByteOrderMark... boms) { this(delegate, false, boms); } /** - * Constructs a new BOM InputStream that detects the - * specified BOMs and optionally includes them. - * @param delegate the InputStream to delegate to - * @param include true to include the specified BOMs or - * false to exclude them - * @param boms The BOMs to detect and optionally exclude + * Compares ByteOrderMark objects in descending length order. */ - @SuppressWarnings("unused") - public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) { + private static final Comparator ByteOrderMarkLengthComparator = (bom1, bom2) -> { + final int len1 = bom1.length(); + final int len2 = bom2.length(); + return Integer.compare(len2, len1); + }; + + /** + * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them. + * + * @param delegate + * the InputStream to delegate to + * @param include + * true to include the specified BOMs or false to exclude them + * @param boms + * The BOMs to detect and optionally exclude + */ + public BOMInputStream(final InputStream delegate, final boolean include, final ByteOrderMark... boms) { super(delegate); - if (boms == null || boms.length == 0) { + if (IOUtil.length(boms) == 0) { throw new IllegalArgumentException("No BOMs specified"); } this.include = include; - this.boms = Arrays.asList(boms); + final List list = Arrays.asList(boms); + // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes. + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + list.sort(ByteOrderMarkLengthComparator); + } + this.boms = list; + } /** * Indicates whether the stream contains one of the specified BOMs. * - * @return true if the stream has one of the specified BOMs, otherwise false - * if it does not - * @throws IOException if an error reading the first bytes of the stream occurs + * @return true if the stream has one of the specified BOMs, otherwise false if it does not + * @throws IOException + * if an error reading the first bytes of the stream occurs */ @SuppressWarnings("unused") public boolean hasBOM() throws IOException { - return (getBOM() != null); + return getBOM() != null; } /** * Indicates whether the stream contains the specified BOM. * - * @param bom The BOM to check for - * @return true if the stream has the specified BOM, otherwise false - * if it does not - * @throws IllegalArgumentException if the BOM is not one the stream - * is configured to detect - * @throws IOException if an error reading the first bytes of the stream occurs + * @param bom + * The BOM to check for + * @return true if the stream has the specified BOM, otherwise false if it does not + * @throws IllegalArgumentException + * if the BOM is not one the stream is configured to detect + * @throws IOException + * if an error reading the first bytes of the stream occurs */ @SuppressWarnings("unused") - public boolean hasBOM(ByteOrderMark bom) throws IOException { + public boolean hasBOM(final ByteOrderMark bom) throws IOException { if (!boms.contains(bom)) { throw new IllegalArgumentException("Stream not configure to detect " + bom); } - return (byteOrderMark != null && getBOM().equals(bom)); + getBOM(); + return byteOrderMark != null && byteOrderMark.equals(bom); } /** * Return the BOM (Byte Order Mark). * * @return The BOM or null if none - * @throws IOException if an error reading the first bytes of the stream occurs + * @throws IOException + * if an error reading the first bytes of the stream occurs */ public ByteOrderMark getBOM() throws IOException { if (firstBytes == null) { - int max = 0; - for (ByteOrderMark bom : boms) { - max = Math.max(max, bom.length()); - } - firstBytes = new int[max]; + fbLength = 0; + // BOMs are sorted from longest to shortest + final int maxBomSize = boms.get(0).length(); + firstBytes = new int[maxBomSize]; + // Read first maxBomSize bytes for (int i = 0; i < firstBytes.length; i++) { firstBytes[i] = in.read(); fbLength++; if (firstBytes[i] < 0) { break; } - - byteOrderMark = find(); - if (byteOrderMark != null) { - if (!include) { + } + // match BOM in firstBytes + byteOrderMark = find(); + if (byteOrderMark != null) { + if (!include) { + if (byteOrderMark.length() < firstBytes.length) { + fbIndex = byteOrderMark.length(); + } else { fbLength = 0; } - break; } } } @@ -197,25 +246,27 @@ public class BOMInputStream extends ProxyInputStream { * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}. * * @return The BOM charset Name or null if no BOM found - * @throws IOException if an error reading the first bytes of the stream occurs - * + * @throws IOException + * if an error reading the first bytes of the stream occurs + * */ public String getBOMCharsetName() throws IOException { getBOM(); - return (byteOrderMark == null ? null : byteOrderMark.getCharsetName()); + return byteOrderMark == null ? null : byteOrderMark.getCharsetName(); } /** - * This method reads and either preserves or skips the first bytes in the - * stream. It behaves like the single-byte read() method, - * either returning a valid byte or -1 to indicate that the initial bytes - * have been processed already. + * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte + * read() method, either returning a valid byte or -1 to indicate that the initial bytes have been + * processed already. + * * @return the byte read (excluding BOM) or -1 if the end of stream - * @throws IOException if an I/O error occurs + * @throws IOException + * if an I/O error occurs */ private int readFirstBytes() throws IOException { getBOM(); - return (fbIndex < fbLength) ? firstBytes[fbIndex++] : -1; + return fbIndex < fbLength ? firstBytes[fbIndex++] : EOF; } /** @@ -224,7 +275,7 @@ public class BOMInputStream extends ProxyInputStream { * @return The matched BOM or null if none matched */ private ByteOrderMark find() { - for (ByteOrderMark bom : boms) { + for (final ByteOrderMark bom : boms) { if (matches(bom)) { return bom; } @@ -235,13 +286,15 @@ public class BOMInputStream extends ProxyInputStream { /** * Check if the bytes match a BOM. * - * @param bom The BOM + * @param bom + * The BOM * @return true if the bytes match the bom, otherwise false */ - private boolean matches(ByteOrderMark bom) { - if (bom.length() != fbLength) { - return false; - } + private boolean matches(final ByteOrderMark bom) { + // if (bom.length() != fbLength) { + // return false; + // } + // firstBytes may be bigger than the BOM bytes for (int i = 0; i < bom.length(); i++) { if (bom.get(i) != firstBytes[i]) { return false; @@ -250,36 +303,41 @@ public class BOMInputStream extends ProxyInputStream { return true; } - //---------------------------------------------------------------------------- - // Implementation of InputStream - //---------------------------------------------------------------------------- + // ---------------------------------------------------------------------------- + // Implementation of InputStream + // ---------------------------------------------------------------------------- /** - * Invokes the delegate's read() method, detecting and - * optionally skipping BOM. + * Invokes the delegate's read() method, detecting and optionally skipping BOM. + * * @return the byte read (excluding BOM) or -1 if the end of stream - * @throws IOException if an I/O error occurs + * @throws IOException + * if an I/O error occurs */ @Override public int read() throws IOException { - int b = readFirstBytes(); - return (b >= 0) ? b : in.read(); + final int b = readFirstBytes(); + return b >= 0 ? b : in.read(); } /** - * Invokes the delegate's read(byte[], int, int) method, detecting - * and optionally skipping BOM. - * @param buf the buffer to read the bytes into - * @param off The start offset - * @param len The number of bytes to read (excluding BOM) + * Invokes the delegate's read(byte[], int, int) method, detecting and optionally skipping BOM. + * + * @param buf + * the buffer to read the bytes into + * @param off + * The start offset + * @param len + * The number of bytes to read (excluding BOM) * @return the number of bytes read or -1 if the end of stream - * @throws IOException if an I/O error occurs + * @throws IOException + * if an I/O error occurs */ @Override - public int read(byte[] buf, int off, int len) throws IOException { + public int read(final byte[] buf, int off, int len) throws IOException { int firstCount = 0; int b = 0; - while ((len > 0) && (b >= 0)) { + while (len > 0 && b >= 0) { b = readFirstBytes(); if (b >= 0) { buf[off++] = (byte) (b & 0xFF); @@ -287,37 +345,42 @@ public class BOMInputStream extends ProxyInputStream { firstCount++; } } - int secondCount = in.read(buf, off, len); - return (secondCount < 0) ? (firstCount > 0 ? firstCount : -1) : firstCount + secondCount; + final int secondCount = in.read(buf, off, len); + return secondCount < 0 ? firstCount > 0 ? firstCount : EOF : firstCount + secondCount; } /** - * Invokes the delegate's read(byte[]) method, detecting and - * optionally skipping BOM. - * @param buf the buffer to read the bytes into - * @return the number of bytes read (excluding BOM) - * or -1 if the end of stream - * @throws IOException if an I/O error occurs + * Invokes the delegate's read(byte[]) method, detecting and optionally skipping BOM. + * + * @param buf + * the buffer to read the bytes into + * @return the number of bytes read (excluding BOM) or -1 if the end of stream + * @throws IOException + * if an I/O error occurs */ @Override - public int read(byte[] buf) throws IOException { + public int read(final byte[] buf) throws IOException { return read(buf, 0, buf.length); } /** * Invokes the delegate's mark(int) method. - * @param readlimit read ahead limit + * + * @param readlimit + * read ahead limit */ @Override - public synchronized void mark(int readlimit) { + public synchronized void mark(final int readlimit) { markFbIndex = fbIndex; - markedAtStart = (firstBytes == null); + markedAtStart = firstBytes == null; in.mark(readlimit); } /** * Invokes the delegate's reset() method. - * @throws IOException if an I/O error occurs + * + * @throws IOException + * if an I/O error occurs */ @Override public synchronized void reset() throws IOException { @@ -330,17 +393,20 @@ public class BOMInputStream extends ProxyInputStream { } /** - * Invokes the delegate's skip(long) method, detecting - * and optionallyskipping BOM. - * @param n the number of bytes to skip + * Invokes the delegate's skip(long) method, detecting and optionally skipping BOM. + * + * @param n + * the number of bytes to skip * @return the number of bytes to skipped or -1 if the end of stream - * @throws IOException if an I/O error occurs + * @throws IOException + * if an I/O error occurs */ @Override - public long skip(long n) throws IOException { - while ((n > 0) && (readFirstBytes() >= 0)) { - n--; + public long skip(final long n) throws IOException { + int skipped = 0; + while ((n > skipped) && (readFirstBytes() >= 0)) { + skipped++; } - return in.skip(n); + return in.skip(n - skipped) + skipped; } } diff --git a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/ByteOrderMark.java b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/ByteOrderMark.java index 5ec600f3b..369ec1978 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/ByteOrderMark.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/ByteOrderMark.java @@ -18,15 +18,16 @@ package me.ag2s.epublib.util.commons.io; */ import java.io.Serializable; +import java.util.Locale; /** - * Byte Order Mark (BOM) representation - - * see {@link BOMInputStream}. + * Byte Order Mark (BOM) representation - see {@link BOMInputStream}. * * @see BOMInputStream - * @see Wikipedia - Byte Order Mark - * @version $Id: ByteOrderMark.java 1005099 2010-10-06 16:13:01Z niallp $ - * @since Commons IO 2.0 + * @see Wikipedia: Byte Order Mark + * @see W3C: Autodetection of Character Encodings + * (Non-Normative) + * @since 2.0 */ public class ByteOrderMark implements Serializable { @@ -34,11 +35,32 @@ public class ByteOrderMark implements Serializable { /** UTF-8 BOM */ public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); - /** UTF-16BE BOM (Big Endian) */ + + /** UTF-16BE BOM (Big-Endian) */ public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); - /** UTF-16LE BOM (Little Endian) */ + + /** UTF-16LE BOM (Little-Endian) */ public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); + /** + * UTF-32BE BOM (Big-Endian) + * @since 2.2 + */ + public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); + + /** + * UTF-32LE BOM (Little-Endian) + * @since 2.2 + */ + public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); + + /** + * Unicode BOM character; external form depends on the encoding. + * @see Byte Order Mark (BOM) FAQ + * @since 2.5 + */ + public static final char UTF_BOM = '\uFEFF'; + private final String charsetName; private final int[] bytes; @@ -52,8 +74,8 @@ public class ByteOrderMark implements Serializable { * @throws IllegalArgumentException if the bytes are null or zero * length */ - public ByteOrderMark(String charsetName, int... bytes) { - if (charsetName == null || charsetName.length() == 0) { + public ByteOrderMark(final String charsetName, final int... bytes) { + if (charsetName == null || charsetName.isEmpty()) { throw new IllegalArgumentException("No charsetName specified"); } if (bytes == null || bytes.length == 0) { @@ -88,7 +110,7 @@ public class ByteOrderMark implements Serializable { * @param pos The position * @return The specified byte */ - public int get(int pos) { + public int get(final int pos) { return bytes[pos]; } @@ -98,7 +120,7 @@ public class ByteOrderMark implements Serializable { * @return a copy of the BOM's bytes */ public byte[] getBytes() { - byte[] copy = new byte[bytes.length]; + final byte[] copy = new byte[bytes.length]; for (int i = 0; i < bytes.length; i++) { copy[i] = (byte)bytes[i]; } @@ -113,11 +135,11 @@ public class ByteOrderMark implements Serializable { * false */ @Override - public boolean equals(Object obj) { + public boolean equals(final Object obj) { if (!(obj instanceof ByteOrderMark)) { return false; } - ByteOrderMark bom = (ByteOrderMark)obj; + final ByteOrderMark bom = (ByteOrderMark)obj; if (bytes.length != bom.length()) { return false; } @@ -133,12 +155,12 @@ public class ByteOrderMark implements Serializable { * Return the hashcode for this BOM. * * @return the hashcode for this BOM. - * @see Object#hashCode() + * @see java.lang.Object#hashCode() */ @Override public int hashCode() { int hashCode = getClass().hashCode(); - for (int b : bytes) { + for (final int b : bytes) { hashCode += b; } return hashCode; @@ -152,7 +174,7 @@ public class ByteOrderMark implements Serializable { @Override @SuppressWarnings("NullableProblems") public String toString() { - StringBuilder builder = new StringBuilder(); + final StringBuilder builder = new StringBuilder(); builder.append(getClass().getSimpleName()); builder.append('['); builder.append(charsetName); @@ -162,10 +184,10 @@ public class ByteOrderMark implements Serializable { builder.append(","); } builder.append("0x"); - builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase()); + builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT)); } builder.append(']'); return builder.toString(); } -} +} \ No newline at end of file diff --git a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReader.java b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReader.java index bd0980932..3499354be 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReader.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReader.java @@ -30,42 +30,48 @@ import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.text.MessageFormat; +import java.util.Locale; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; +import me.ag2s.epublib.util.IOUtil; + /** - * Character stream that handles all the necessary Voodo to figure out the + * Character stream that handles all the necessary Voodoo to figure out the * charset encoding of the XML document within the stream. *

* IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. * This one IS a character stream. + *

*

* All this has to be done without consuming characters from the stream, if not * the XML parser will not recognized the document as a valid XML. This is not * 100% true, but it's close enough (UTF-8 BOM is not handled by all parsers * right now, XmlStreamReader handles it and things work in all parsers). + *

*

* The XmlStreamReader class handles the charset encoding of XML documents in * Files, raw streams and HTTP streams by offering a wide set of constructors. + *

*

* By default the charset encoding detection is lenient, the constructor with - * the lenient flag can be used for an script (following HTTP MIME and XML + * the lenient flag can be used for a script (following HTTP MIME and XML * specifications). All this is nicely explained by Mark Pilgrim in his blog, * Determining the character encoding of a feed. + *

*

* Originally developed for ROME under * Apache License 2.0. + *

* - * @author Alejandro Abdelnur - * @version $Id: XmlStreamReader.java 1052161 2010-12-23 03:12:09Z niallp $ - * @see "org.apache.commons.io.output.XmlStreamWriter" - * @since Commons IO 2.0 + * //@seerr XmlStreamWriter + * @since 2.0 */ public class XmlStreamReader extends Reader { - private static final int BUFFER_SIZE = 4096; + private static final int BUFFER_SIZE = IOUtil.DEFAULT_BUFFER_SIZE; private static final String UTF_8 = "UTF-8"; @@ -75,22 +81,35 @@ public class XmlStreamReader extends Reader { private static final String UTF_16LE = "UTF-16LE"; + private static final String UTF_32BE = "UTF-32BE"; + + private static final String UTF_32LE = "UTF-32LE"; + private static final String UTF_16 = "UTF-16"; + private static final String UTF_32 = "UTF-32"; + private static final String EBCDIC = "CP1047"; private static final ByteOrderMark[] BOMS = new ByteOrderMark[] { - ByteOrderMark.UTF_8, - ByteOrderMark.UTF_16BE, - ByteOrderMark.UTF_16LE - }; - private static final ByteOrderMark[] XML_GUESS_BYTES = new ByteOrderMark[] { - new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D), - new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F), - new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00), - new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94) + ByteOrderMark.UTF_8, + ByteOrderMark.UTF_16BE, + ByteOrderMark.UTF_16LE, + ByteOrderMark.UTF_32BE, + ByteOrderMark.UTF_32LE }; + // UTF_16LE and UTF_32LE have the same two starting BOM bytes. + private static final ByteOrderMark[] XML_GUESS_BYTES = new ByteOrderMark[] { + new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D), + new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F), + new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00), + new ByteOrderMark(UTF_32BE, 0x00, 0x00, 0x00, 0x3C, + 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D), + new ByteOrderMark(UTF_32LE, 0x3C, 0x00, 0x00, 0x00, + 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00), + new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94) + }; private final Reader reader; @@ -106,7 +125,6 @@ public class XmlStreamReader extends Reader { * * @return the default encoding to use. */ - @SuppressWarnings("unused") public String getDefaultEncoding() { return defaultEncoding; } @@ -124,8 +142,8 @@ public class XmlStreamReader extends Reader { * @throws IOException thrown if there is a problem reading the file. */ @SuppressWarnings("unused") - public XmlStreamReader(File file) throws IOException { - this(new FileInputStream(file)); + public XmlStreamReader(final File file) throws IOException { + this(new FileInputStream(Objects.requireNonNull(file))); } /** @@ -136,11 +154,11 @@ public class XmlStreamReader extends Reader { * It does a lenient charset encoding detection, check the constructor with * the lenient parameter for details. * - * @param is InputStream to create a Reader from. + * @param inputStream InputStream to create a Reader from. * @throws IOException thrown if there is a problem reading the stream. */ - public XmlStreamReader(InputStream is) throws IOException { - this(is, true); + public XmlStreamReader(final InputStream inputStream) throws IOException { + this(inputStream, true); } /** @@ -163,15 +181,15 @@ public class XmlStreamReader extends Reader { * If lenient detection is indicated an XmlStreamReaderException is never * thrown. * - * @param is InputStream to create a Reader from. + * @param inputStream InputStream to create a Reader from. * @param lenient indicates if the charset encoding detection should be * relaxed. * @throws IOException thrown if there is a problem reading the stream. * @throws XmlStreamReaderException thrown if the charset encoding could not * be determined according to the specs. */ - public XmlStreamReader(InputStream is, boolean lenient) throws IOException { - this(is, lenient, null); + public XmlStreamReader(final InputStream inputStream, final boolean lenient) throws IOException { + this(inputStream, lenient, null); } /** @@ -194,7 +212,7 @@ public class XmlStreamReader extends Reader { * If lenient detection is indicated an XmlStreamReaderException is never * thrown. * - * @param is InputStream to create a Reader from. + * @param inputStream InputStream to create a Reader from. * @param lenient indicates if the charset encoding detection should be * relaxed. * @param defaultEncoding The default encoding @@ -202,10 +220,12 @@ public class XmlStreamReader extends Reader { * @throws XmlStreamReaderException thrown if the charset encoding could not * be determined according to the specs. */ - public XmlStreamReader(InputStream is, boolean lenient, String defaultEncoding) throws IOException { + public XmlStreamReader(final InputStream inputStream, final boolean lenient, final String defaultEncoding) + throws IOException { + Objects.requireNonNull(inputStream, "inputStream"); this.defaultEncoding = defaultEncoding; - BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); - BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS); + final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); this.encoding = doRawStream(bom, pis, lenient); this.reader = new InputStreamReader(pis, encoding); } @@ -228,8 +248,8 @@ public class XmlStreamReader extends Reader { * the URL. */ @SuppressWarnings("unused") - public XmlStreamReader(URL url) throws IOException { - this(url.openConnection(), null); + public XmlStreamReader(final URL url) throws IOException { + this(Objects.requireNonNull(url, "url").openConnection(), null); } /** @@ -251,24 +271,24 @@ public class XmlStreamReader extends Reader { * @throws IOException thrown if there is a problem reading the stream of * the URLConnection. */ - public XmlStreamReader(URLConnection conn, String defaultEncoding) throws IOException { + public XmlStreamReader(final URLConnection conn, final String defaultEncoding) throws IOException { + Objects.requireNonNull(conn, "conm"); this.defaultEncoding = defaultEncoding; - @SuppressWarnings("unused") - boolean lenient = true; - String contentType = conn.getContentType(); - InputStream is = conn.getInputStream(); - BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); - BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + final boolean lenient = true; + final String contentType = conn.getContentType(); + final InputStream inputStream = conn.getInputStream(); + final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS); + final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); if (conn instanceof HttpURLConnection || contentType != null) { - this.encoding = doHttpStream(bom, pis, contentType, true); + this.encoding = processHttpStream(bom, pis, contentType, lenient); } else { - this.encoding = doRawStream(bom, pis, true); + this.encoding = doRawStream(bom, pis, lenient); } this.reader = new InputStreamReader(pis, encoding); } /** - * Creates a Reader using an InputStream an the associated content-type + * Creates a Reader using an InputStream and the associated content-type * header. *

* First it checks if the stream has BOM. If there is not BOM checks the @@ -279,18 +299,18 @@ public class XmlStreamReader extends Reader { * It does a lenient charset encoding detection, check the constructor with * the lenient parameter for details. * - * @param is InputStream to create the reader from. + * @param inputStream InputStream to create the reader from. * @param httpContentType content-type header to use for the resolution of * the charset encoding. * @throws IOException thrown if there is a problem reading the file. */ - public XmlStreamReader(InputStream is, String httpContentType) + public XmlStreamReader(final InputStream inputStream, final String httpContentType) throws IOException { - this(is, httpContentType, true); + this(inputStream, httpContentType, true); } /** - * Creates a Reader using an InputStream an the associated content-type + * Creates a Reader using an InputStream and the associated content-type * header. This constructor is lenient regarding the encoding detection. *

* First it checks if the stream has BOM. If there is not BOM checks the @@ -313,7 +333,7 @@ public class XmlStreamReader extends Reader { * If lenient detection is indicated an XmlStreamReaderException is never * thrown. * - * @param is InputStream to create the reader from. + * @param inputStream InputStream to create the reader from. * @param httpContentType content-type header to use for the resolution of * the charset encoding. * @param lenient indicates if the charset encoding detection should be @@ -323,17 +343,18 @@ public class XmlStreamReader extends Reader { * @throws XmlStreamReaderException thrown if the charset encoding could not * be determined according to the specs. */ - public XmlStreamReader(InputStream is, String httpContentType, - boolean lenient, String defaultEncoding) throws IOException { + public XmlStreamReader(final InputStream inputStream, final String httpContentType, + final boolean lenient, final String defaultEncoding) throws IOException { + Objects.requireNonNull(inputStream, "inputStream"); this.defaultEncoding = defaultEncoding; - BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); - BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); - this.encoding = doHttpStream(bom, pis, httpContentType, lenient); + final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS); + final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + this.encoding = processHttpStream(bom, pis, httpContentType, lenient); this.reader = new InputStreamReader(pis, encoding); } /** - * Creates a Reader using an InputStream an the associated content-type + * Creates a Reader using an InputStream and the associated content-type * header. This constructor is lenient regarding the encoding detection. *

* First it checks if the stream has BOM. If there is not BOM checks the @@ -356,7 +377,7 @@ public class XmlStreamReader extends Reader { * If lenient detection is indicated an XmlStreamReaderException is never * thrown. * - * @param is InputStream to create the reader from. + * @param inputStream InputStream to create the reader from. * @param httpContentType content-type header to use for the resolution of * the charset encoding. * @param lenient indicates if the charset encoding detection should be @@ -365,9 +386,9 @@ public class XmlStreamReader extends Reader { * @throws XmlStreamReaderException thrown if the charset encoding could not * be determined according to the specs. */ - public XmlStreamReader(InputStream is, String httpContentType, - boolean lenient) throws IOException { - this(is, httpContentType, lenient, null); + public XmlStreamReader(final InputStream inputStream, final String httpContentType, + final boolean lenient) throws IOException { + this(inputStream, httpContentType, lenient, null); } /** @@ -388,7 +409,7 @@ public class XmlStreamReader extends Reader { * @throws IOException if an I/O error occurs */ @Override - public int read(char[] buf, int offset, int len) throws IOException { + public int read(final char[] buf, final int offset, final int len) throws IOException { return reader.read(buf, offset, len); } @@ -412,19 +433,18 @@ public class XmlStreamReader extends Reader { * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ - private String doRawStream(BOMInputStream bom, BOMInputStream pis, boolean lenient) + private String doRawStream(final BOMInputStream bom, final BOMInputStream pis, final boolean lenient) throws IOException { - String bomEnc = bom.getBOMCharsetName(); - String xmlGuessEnc = pis.getBOMCharsetName(); - String xmlEnc = getXmlProlog(pis, xmlGuessEnc); + final String bomEnc = bom.getBOMCharsetName(); + final String xmlGuessEnc = pis.getBOMCharsetName(); + final String xmlEnc = getXmlProlog(pis, xmlGuessEnc); try { return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); - } catch (XmlStreamReaderException ex) { + } catch (final XmlStreamReaderException ex) { if (lenient) { return doLenientDetection(null, ex); - } else { - throw ex; } + throw ex; } } @@ -439,20 +459,18 @@ public class XmlStreamReader extends Reader { * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ - private String doHttpStream(BOMInputStream bom, BOMInputStream pis, String httpContentType, - boolean lenient) throws IOException { - String bomEnc = bom.getBOMCharsetName(); - String xmlGuessEnc = pis.getBOMCharsetName(); - String xmlEnc = getXmlProlog(pis, xmlGuessEnc); + private String processHttpStream(final BOMInputStream bom, final BOMInputStream pis, final String httpContentType, + final boolean lenient) throws IOException { + final String bomEnc = bom.getBOMCharsetName(); + final String xmlGuessEnc = pis.getBOMCharsetName(); + final String xmlEnc = getXmlProlog(pis, xmlGuessEnc); try { - return calculateHttpEncoding(httpContentType, bomEnc, - xmlGuessEnc, xmlEnc, lenient); - } catch (XmlStreamReaderException ex) { + return calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient); + } catch (final XmlStreamReaderException ex) { if (lenient) { return doLenientDetection(httpContentType, ex); - } else { - throw ex; } + throw ex; } } @@ -466,14 +484,14 @@ public class XmlStreamReader extends Reader { * @throws IOException thrown if there is a problem reading the stream. */ private String doLenientDetection(String httpContentType, - XmlStreamReaderException ex) throws IOException { + XmlStreamReaderException ex) throws IOException { if (httpContentType != null && httpContentType.startsWith("text/html")) { httpContentType = httpContentType.substring("text/html".length()); httpContentType = "text/xml" + httpContentType; try { return calculateHttpEncoding(httpContentType, ex.getBomEncoding(), ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true); - } catch (XmlStreamReaderException ex2) { + } catch (final XmlStreamReaderException ex2) { ex = ex2; } } @@ -482,7 +500,7 @@ public class XmlStreamReader extends Reader { encoding = ex.getContentTypeEncoding(); } if (encoding == null) { - encoding = (defaultEncoding == null) ? UTF_8 : defaultEncoding; + encoding = defaultEncoding == null ? UTF_8 : defaultEncoding; } return encoding; } @@ -496,16 +514,16 @@ public class XmlStreamReader extends Reader { * @return the raw encoding * @throws IOException thrown if there is a problem reading the stream. */ - String calculateRawEncoding(String bomEnc, String xmlGuessEnc, - String xmlEnc) throws IOException { + String calculateRawEncoding(final String bomEnc, final String xmlGuessEnc, + final String xmlEnc) throws IOException { // BOM is Null if (bomEnc == null) { if (xmlGuessEnc == null || xmlEnc == null) { - return (defaultEncoding == null ? UTF_8 : defaultEncoding); + return defaultEncoding == null ? UTF_8 : defaultEncoding; } if (xmlEnc.equals(UTF_16) && - (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) { + (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) { return xmlGuessEnc; } return xmlEnc; @@ -514,11 +532,11 @@ public class XmlStreamReader extends Reader { // BOM is UTF-8 if (bomEnc.equals(UTF_8)) { if (xmlGuessEnc != null && !xmlGuessEnc.equals(UTF_8)) { - String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); } if (xmlEnc != null && !xmlEnc.equals(UTF_8)) { - String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); } return bomEnc; @@ -527,18 +545,31 @@ public class XmlStreamReader extends Reader { // BOM is UTF-16BE or UTF-16LE if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) { if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) { - String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); } if (xmlEnc != null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) { - String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + return bomEnc; + } + + // BOM is UTF-32BE or UTF-32LE + if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) { + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) { + final String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); + } + if (xmlEnc != null && !xmlEnc.equals(UTF_32) && !xmlEnc.equals(bomEnc)) { + final String msg = MessageFormat.format(RAW_EX_1, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); } return bomEnc; } // BOM is something else - String msg = MessageFormat.format(RAW_EX_2, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(RAW_EX_2, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc); } @@ -555,9 +586,9 @@ public class XmlStreamReader extends Reader { * @return the HTTP encoding * @throws IOException thrown if there is a problem reading the stream. */ - String calculateHttpEncoding(String httpContentType, - String bomEnc, String xmlGuessEnc, String xmlEnc, - boolean lenient) throws IOException { + String calculateHttpEncoding(final String httpContentType, + final String bomEnc, final String xmlGuessEnc, final String xmlEnc, + final boolean lenient) throws IOException { // Lenient and has XML encoding if (lenient && xmlEnc != null) { @@ -565,14 +596,14 @@ public class XmlStreamReader extends Reader { } // Determine mime/encoding content types from HTTP Content Type - String cTMime = getContentTypeMime(httpContentType); - String cTEnc = getContentTypeEncoding(httpContentType); - boolean appXml = isAppXml(cTMime); - boolean textXml = isTextXml(cTMime); + final String cTMime = getContentTypeMime(httpContentType); + final String cTEnc = getContentTypeEncoding(httpContentType); + final boolean appXml = isAppXml(cTMime); + final boolean textXml = isTextXml(cTMime); // Mime type NOT "application/xml" or "text/xml" if (!appXml && !textXml) { - String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); } @@ -580,15 +611,14 @@ public class XmlStreamReader extends Reader { if (cTEnc == null) { if (appXml) { return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); - } else { - return (defaultEncoding == null) ? US_ASCII : defaultEncoding; } + return defaultEncoding == null ? US_ASCII : defaultEncoding; } // UTF-16BE or UTF-16LE content type encoding if (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE)) { if (bomEnc != null) { - String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); } return cTEnc; @@ -599,7 +629,25 @@ public class XmlStreamReader extends Reader { if (bomEnc != null && bomEnc.startsWith(UTF_16)) { return bomEnc; } - String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + } + + // UTF-32BE or UTF-132E content type encoding + if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) { + if (bomEnc != null) { + final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); + } + return cTEnc; + } + + // UTF-32 content type encoding + if (cTEnc.equals(UTF_32)) { + if (bomEnc != null && bomEnc.startsWith(UTF_32)) { + return bomEnc; + } + final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc); } @@ -612,10 +660,10 @@ public class XmlStreamReader extends Reader { * @param httpContentType the HTTP content type * @return The mime content type */ - static String getContentTypeMime(String httpContentType) { + static String getContentTypeMime(final String httpContentType) { String mime = null; if (httpContentType != null) { - int i = httpContentType.indexOf(";"); + final int i = httpContentType.indexOf(";"); if (i >= 0) { mime = httpContentType.substring(0, i); } else { @@ -634,22 +682,25 @@ public class XmlStreamReader extends Reader { * httpContentType is NULL. * * @param httpContentType the HTTP content type - * @return The content type encoding + * @return The content type encoding (upcased) */ - static String getContentTypeEncoding(String httpContentType) { + static String getContentTypeEncoding(final String httpContentType) { String encoding = null; if (httpContentType != null) { - int i = httpContentType.indexOf(";"); + final int i = httpContentType.indexOf(";"); if (i > -1) { - String postMime = httpContentType.substring(i + 1); - Matcher m = CHARSET_PATTERN.matcher(postMime); - encoding = (m.find()) ? m.group(1) : null; - encoding = (encoding != null) ? encoding.toUpperCase() : null; + final String postMime = httpContentType.substring(i + 1); + final Matcher m = CHARSET_PATTERN.matcher(postMime); + encoding = m.find() ? m.group(1) : null; + encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null; } } return encoding; } + /** + * Pattern capturing the encoding of the "xml" processing instruction. + */ public static final Pattern ENCODING_PATTERN = Pattern.compile( "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE); @@ -657,52 +708,50 @@ public class XmlStreamReader extends Reader { /** * Returns the encoding declared in the , NULL if none. * - * @param is InputStream to create the reader from. + * @param inputStream InputStream to create the reader from. * @param guessedEnc guessed encoding * @return the encoding declared in the * @throws IOException thrown if there is a problem reading the stream. */ - private static String getXmlProlog(InputStream is, String guessedEnc) + private static String getXmlProlog(final InputStream inputStream, final String guessedEnc) throws IOException { String encoding = null; if (guessedEnc != null) { - byte[] bytes = new byte[BUFFER_SIZE]; - is.mark(BUFFER_SIZE); + final byte[] bytes = new byte[BUFFER_SIZE]; + inputStream.mark(BUFFER_SIZE); int offset = 0; int max = BUFFER_SIZE; - int c = is.read(bytes, offset, max); + int c = inputStream.read(bytes, offset, max); int firstGT = -1; - String xmlProlog = null; + String xmlProlog = ""; // avoid possible NPE warning (cannot happen; this just silences the warning) while (c != -1 && firstGT == -1 && offset < BUFFER_SIZE) { offset += c; max -= c; - c = is.read(bytes, offset, max); + c = inputStream.read(bytes, offset, max); xmlProlog = new String(bytes, 0, offset, guessedEnc); firstGT = xmlProlog.indexOf('>'); } if (firstGT == -1) { if (c == -1) { throw new IOException("Unexpected end of XML stream"); - } else { - throw new IOException( - "XML prolog or ROOT element not found on first " - + offset + " bytes"); } + throw new IOException( + "XML prolog or ROOT element not found on first " + + offset + " bytes"); } - int bytesRead = offset; + final int bytesRead = offset; if (bytesRead > 0) { - is.reset(); - BufferedReader bReader = new BufferedReader(new StringReader( + inputStream.reset(); + final BufferedReader bReader = new BufferedReader(new StringReader( xmlProlog.substring(0, firstGT + 1))); - StringBuilder prolog = new StringBuilder(); - String line = bReader.readLine(); - while (line != null) { + final StringBuffer prolog = new StringBuffer(); + String line; + while ((line = bReader.readLine()) != null) { prolog.append(line); - line = bReader.readLine(); } - Matcher m = ENCODING_PATTERN.matcher(prolog); + final Matcher m = ENCODING_PATTERN.matcher(prolog); if (m.find()) { - encoding = Objects.requireNonNull(m.group(1)).toUpperCase(); + encoding = Objects.requireNonNull(m.group(1)).toUpperCase(Locale.ROOT); encoding = encoding.substring(1, encoding.length() - 1); } } @@ -712,46 +761,46 @@ public class XmlStreamReader extends Reader { /** * Indicates if the MIME type belongs to the APPLICATION XML family. - * + * * @param mime The mime type * @return true if the mime type belongs to the APPLICATION XML family, * otherwise false */ - static boolean isAppXml(String mime) { + static boolean isAppXml(final String mime) { return mime != null && - (mime.equals("application/xml") || - mime.equals("application/xml-dtd") || - mime.equals("application/xml-external-parsed-entity") || - (mime.startsWith("application/") && mime.endsWith("+xml"))); + (mime.equals("application/xml") || + mime.equals("application/xml-dtd") || + mime.equals("application/xml-external-parsed-entity") || + mime.startsWith("application/") && mime.endsWith("+xml")); } /** * Indicates if the MIME type belongs to the TEXT XML family. - * + * * @param mime The mime type * @return true if the mime type belongs to the TEXT XML family, * otherwise false */ - static boolean isTextXml(String mime) { + static boolean isTextXml(final String mime) { return mime != null && - (mime.equals("text/xml") || - mime.equals("text/xml-external-parsed-entity") || - (mime.startsWith("text/") && mime.endsWith("+xml"))); + (mime.equals("text/xml") || + mime.equals("text/xml-external-parsed-entity") || + mime.startsWith("text/") && mime.endsWith("+xml")); } private static final String RAW_EX_1 = - "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch"; + "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch"; private static final String RAW_EX_2 = - "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM"; + "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM"; private static final String HTTP_EX_1 = - "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL"; + "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL"; private static final String HTTP_EX_2 = - "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch"; + "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch"; private static final String HTTP_EX_3 = - "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME"; + "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME"; -} +} \ No newline at end of file diff --git a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReaderException.java b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReaderException.java index 0f97df60c..a903279d4 100644 --- a/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReaderException.java +++ b/epublib/src/main/java/me/ag2s/epublib/util/commons/io/XmlStreamReaderException.java @@ -28,10 +28,9 @@ import java.io.IOException; * do an alternate processing with the stream. Note that the original * InputStream given to the XmlStreamReader cannot be used as that one has been * already read. + *

* - * @author Alejandro Abdelnur - * @version $Id: XmlStreamReaderException.java 1004112 2010-10-04 04:48:25Z niallp $ - * @since Commons IO 2.0 + * @since 2.0 */ public class XmlStreamReaderException extends IOException { @@ -52,14 +51,15 @@ public class XmlStreamReaderException extends IOException { * determined. *

* Instances of this exception are thrown by the XmlStreamReader. + *

* * @param msg message describing the reason for the exception. * @param bomEnc BOM encoding. * @param xmlGuessEnc XML guess encoding. * @param xmlEnc XML prolog encoding. */ - public XmlStreamReaderException(String msg, String bomEnc, - String xmlGuessEnc, String xmlEnc) { + public XmlStreamReaderException(final String msg, final String bomEnc, + final String xmlGuessEnc, final String xmlEnc) { this(msg, null, null, bomEnc, xmlGuessEnc, xmlEnc); } @@ -68,6 +68,7 @@ public class XmlStreamReaderException extends IOException { * determined. *

* Instances of this exception are thrown by the XmlStreamReader. + *

* * @param msg message describing the reason for the exception. * @param ctMime MIME type in the content-type. @@ -76,8 +77,8 @@ public class XmlStreamReaderException extends IOException { * @param xmlGuessEnc XML guess encoding. * @param xmlEnc XML prolog encoding. */ - public XmlStreamReaderException(String msg, String ctMime, String ctEnc, - String bomEnc, String xmlGuessEnc, String xmlEnc) { + public XmlStreamReaderException(final String msg, final String ctMime, final String ctEnc, + final String bomEnc, final String xmlGuessEnc, final String xmlEnc) { super(msg); contentTypeMime = ctMime; contentTypeEncoding = ctEnc; @@ -120,7 +121,6 @@ public class XmlStreamReaderException extends IOException { * @return the MIME type in the content-type, null if there was not * content-type or the encoding detection did not involve HTTP. */ - @SuppressWarnings("unused") public String getContentTypeMime() { return contentTypeMime; }