diff --git a/pom.xml b/pom.xml index 1c4f925..ecfc3d7 100644 --- a/pom.xml +++ b/pom.xml @@ -49,32 +49,21 @@ UTF-8 UTF-8 - 2.11.5 - 1.7.36 - - 5.3.0 - 1.16.0 - 2.18.0 - 1.27.1 - 1.9.4 - - 2.0.32 - + 1.84 + 1.28.0 + 2.22.0 2.3.1 2.3.9 - + 2.0.36 + 5.3.0 + 1.16.0 + 2.0.17 + 2.11.6 - 5.11.3 3.0 2.13.1 - 2.24.2 - - - levigo_filetype-analyzer - levigo - https://sonarcloud.io - - 1.79 + 5.11.4 + 2.26.0 @@ -82,7 +71,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.12.1 + 3.15.0 1.8 1.8 @@ -92,7 +81,7 @@ org.jacoco jacoco-maven-plugin - 0.8.11 + 0.8.14 pre-unit-test @@ -116,7 +105,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.2.3 + 3.5.5 ${jacocoAgentArgLine} @@ -132,12 +121,12 @@ org.apache.maven.plugins maven-enforcer-plugin - 3.5.0 + 3.6.2 org.codehaus.mojo extra-enforcer-rules - 1.8.0 + 1.12.0 @@ -181,7 +170,7 @@ org.apache.maven.plugins maven-release-plugin - 3.1.1 + 3.3.0 git @@ -199,38 +188,74 @@ org.apache.maven.plugins maven-jar-plugin - 3.3.0 + 3.5.0 org.apache.maven.plugins maven-clean-plugin - 3.3.2 + 3.5.0 org.apache.maven.plugins maven-install-plugin - 3.1.1 + 3.1.4 org.apache.maven.plugins maven-site-plugin - 3.12.1 + 3.21.0 org.apache.maven.plugins maven-resources-plugin - 3.3.1 + 3.5.0 org.apache.maven.plugins maven-deploy-plugin - 3.1.2 + 3.1.4 org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 + 3.12.0 + + org.codehaus.mojo + versions-maven-plugin + 2.21.0 + + + + + + regex + (.+-SNAPSHOT|.+-M\d|.+-(rc|RC).*) + + + regex + .+-(alpha|beta).* + + + regex + .+does-not-exist.* + + + regex + .*-nx[0-9]+ + + + regex + .*-atlassian.* + + + regex + .+-jdk5.* + + + + + @@ -293,7 +318,7 @@ org.apache.logging.log4j - log4j-slf4j-impl + log4j-slf4j2-impl ${log4j.version} test @@ -382,25 +407,8 @@ ${commons-compress.version} compile - - org.apache.opennlp - opennlp-tools - ${opennlp-tools.version} - compile - - - - maven2.releases.levigo.de - https://levigo.de/maven2/content/repositories/levigo-releases/ - - - maven2.snapshots.levigo.de - https://levigo.de/maven2/content/repositories/levigo-snapshots/ - - - @@ -415,14 +423,14 @@ - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.13 + org.sonatype.central + central-publishing-maven-plugin + 0.10.0 true - ossrh - https://s01.oss.sonatype.org/ - true + central + true + uploaded @@ -430,7 +438,7 @@ org.apache.maven.plugins maven-source-plugin - 3.3.0 + 3.4.0 attach-sources @@ -462,7 +470,7 @@ org.apache.maven.plugins maven-gpg-plugin - 3.0.1 + 3.1.0 sign-artifacts diff --git a/src/main/java/org/jadice/filetype/Analyzer.java b/src/main/java/org/jadice/filetype/Analyzer.java index d4bdf46..2e088d2 100644 --- a/src/main/java/org/jadice/filetype/Analyzer.java +++ b/src/main/java/org/jadice/filetype/Analyzer.java @@ -15,14 +15,13 @@ import javax.xml.bind.ValidationEventHandler; import javax.xml.bind.ValidationEventLocator; -import opennlp.tools.util.model.UncloseableInputStream; import org.apache.commons.io.FilenameUtils; import org.jadice.filetype.database.Database; import org.jadice.filetype.database.DescriptionAction; import org.jadice.filetype.database.Type; -import org.jadice.filetype.io.MemoryInputStream; import org.jadice.filetype.io.RandomAccessFileInputStream; import org.jadice.filetype.io.SeekableInputStream; +import org.jadice.filetype.io.UncloseableMemoryInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -228,17 +227,21 @@ public Map analyze(final InputStream sis, final AnalysisListener public Map analyze(final InputStream is, final AnalysisListener listener, final String fileName) throws IOException { Map result = new HashMap<>(); - - - // POI (3.1-Final) closes the stream during analyszs of office files - use an uncloseable stream wrapper - final UncloseableInputStream uis = new UncloseableInputStream(is); - final UncloseableSeekableInputStreamWrapper usis = new UncloseableSeekableInputStreamWrapper(new MemoryInputStream(uis)); - usis.lockClose(); // and don't unlock later as POI attempts to close asynchronously! - + final AnalysisListener effectiveListener = listener != null ? listener : DEFAULT_LISTENER; + // POI may close streams during analysis; shield callers by using an uncloseable, seekable wrapper. + // If the input is already seekable, avoid buffering the full stream in memory. + final SeekableInputStream baseStream; + if (is instanceof SeekableInputStream) { + baseStream = (SeekableInputStream) is; + } else { + baseStream = new UncloseableMemoryInputStream(is); + } + final UncloseableSeekableInputStreamWrapper usis = new UncloseableSeekableInputStreamWrapper(baseStream); + usis.lockClose(); // do not unlock later as POI may attempt to close asynchronously final String sanitizedFileName = fileName != null ? fileName.replaceAll("[:\\\\/*?|<>]", "_") : null; String extension = FilenameUtils.getExtension(sanitizedFileName); - Context ctx = new Context(usis, result, listener, locale, extension); + Context ctx = new Context(usis, result, effectiveListener, locale, extension); database.analyze(ctx); @@ -267,12 +270,13 @@ public Map analyze(final File file, final AnalysisListener liste SeekableInputStream sis = new RandomAccessFileInputStream(file); try { String fileName = file.getName(); - return analyze(sis, null, fileName); + return analyze(sis, listener, fileName); } finally { try { sis.close(); } catch (IOException e) { - listener.error(this, "Exception closing RandomAccessFileInputStream", e); + final AnalysisListener effectiveListener = listener != null ? listener : DEFAULT_LISTENER; + effectiveListener.error(this, "Exception closing RandomAccessFileInputStream", e); } } } @@ -289,13 +293,13 @@ public Map analyze(final SeekableInputStream sis) throws IOExcep } - public Map analyzeWithFilename(final SeekableInputStream sis,final String fileName) throws IOException { + public Map analyzeWithFilename(final SeekableInputStream sis, final String fileName) throws IOException { return analyze(sis, DEFAULT_LISTENER, fileName); } /** * Analyze the stream supplied via an {@link InputStream}.
- * Caveat: the data will be buffered in memory. If you don't like this, supply a + * Caveat: non-seekable streams may be buffered in memory. If you don't like this, supply a * {@link SeekableInputStream} implementation or a {@link File} instead. * * @param is diff --git a/src/main/java/org/jadice/filetype/io/UncloseableMemoryInputStream.java b/src/main/java/org/jadice/filetype/io/UncloseableMemoryInputStream.java new file mode 100644 index 0000000..fb800ed --- /dev/null +++ b/src/main/java/org/jadice/filetype/io/UncloseableMemoryInputStream.java @@ -0,0 +1,22 @@ +package org.jadice.filetype.io; + +import java.io.IOException; +import java.io.InputStream; + + +/** + * An {@link MemoryInputStream} which cannot be closed. + */ +public class UncloseableMemoryInputStream extends MemoryInputStream { + public UncloseableMemoryInputStream(final InputStream is) throws IOException { + super(is); + } + + /** + * This method does not have any effect, as the {@link MemoryInputStream} cannot be closed. + */ + @Override + public void close() throws IOException { + // ignore + } +}