Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 68 additions & 60 deletions pom.xml
Comment thread
welschsn marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -49,40 +49,29 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>

<zip4j.version>2.11.5</zip4j.version>
<slf4j.version>1.7.36</slf4j.version>

<poi.version>5.3.0</poi.version>
<rtfparserkit.version>1.16.0</rtfparserkit.version>
<commons.io.version>2.18.0</commons.io.version>
<commons-compress.version>1.27.1</commons-compress.version>
<opennlp-tools.version>1.9.4</opennlp-tools.version>

<pdfbox.version>2.0.32</pdfbox.version>

<bouncycastle.version>1.84</bouncycastle.version>
<commons-compress.version>1.28.0</commons-compress.version>
<commons.io.version>2.22.0</commons.io.version>
<jaxb-api.version>2.3.1</jaxb-api.version>
<jaxb-runtime.version>2.3.9</jaxb-runtime.version>

<pdfbox.version>2.0.36</pdfbox.version>
<poi.version>5.3.0</poi.version>
<rtfparserkit.version>1.16.0</rtfparserkit.version>
<slf4j.version>2.0.17</slf4j.version>
<zip4j.version>2.11.6</zip4j.version>
<!-- testing -->
<junit.version>5.11.3</junit.version>
<hamcrest.version>3.0</hamcrest.version>
<jmock-junit5.version>2.13.1</jmock-junit5.version>
<log4j.version>2.24.2</log4j.version>

<!-- sonarcloud -->
<sonar.projectKey>levigo_filetype-analyzer</sonar.projectKey>
<sonar.organization>levigo</sonar.organization>
<sonar.host.url>https://sonarcloud.io</sonar.host.url>

<bouncycastle.version>1.79</bouncycastle.version>
<junit.version>5.11.4</junit.version>
<log4j.version>2.26.0</log4j.version>
</properties>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.12.1</version>
<version>3.15.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
Expand All @@ -92,7 +81,7 @@
<!-- Calculating the test coverage -->
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.11</version>
<version>0.8.14</version>
<executions>
<execution>
<id>pre-unit-test</id>
Expand All @@ -116,7 +105,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.3</version>
<version>3.5.5</version>
<configuration>
<!-- Sets the VM argument line as defined by the JaCoCo plugin. -->
<argLine>${jacocoAgentArgLine}</argLine>
Expand All @@ -132,12 +121,12 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>3.5.0</version>
<version>3.6.2</version>
<dependencies>
<dependency>
<groupId>org.codehaus.mojo</groupId>
<artifactId>extra-enforcer-rules</artifactId>
<version>1.8.0</version>
<version>1.12.0</version>
</dependency>
</dependencies>
<executions>
Expand Down Expand Up @@ -181,7 +170,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>3.1.1</version>
<version>3.3.0</version>
<configuration>
<username>git</username>
<password/>
Expand All @@ -199,38 +188,74 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<version>3.5.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
<version>3.3.2</version>
<version>3.5.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>3.1.1</version>
<version>3.1.4</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.12.1</version>
<version>3.21.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>3.3.1</version>
<version>3.5.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>3.1.2</version>
<version>3.1.4</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.6.3</version>
<version>3.12.0</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
<version>2.21.0</version>
<configuration>
<ruleSet>
<ignoreVersions>
<ignoreVersion>
<!-- can be either: 'exact' (default) or 'regex' -->
<type>regex</type>
<version>(.+-SNAPSHOT|.+-M\d|.+-(rc|RC).*)</version>
</ignoreVersion>
<ignoreVersion>
<type>regex</type>
<version>.+-(alpha|beta).*</version>
</ignoreVersion>
<ignoreVersion>
<type>regex</type>
<version>.+does-not-exist.*</version>
</ignoreVersion>
<ignoreVersion>
<type>regex</type>
<version>.*-nx[0-9]+</version>
</ignoreVersion>
<ignoreVersion>
<type>regex</type>
<version>.*-atlassian.*</version>
</ignoreVersion>
<ignoreVersion>
<type>regex</type>
<version>.+-jdk5.*</version>
</ignoreVersion>
</ignoreVersions>
</ruleSet>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
Expand Down Expand Up @@ -293,7 +318,7 @@
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>${log4j.version}</version>
<scope>test</scope>
</dependency>
Expand Down Expand Up @@ -382,25 +407,8 @@
<version>${commons-compress.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
<version>${opennlp-tools.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>

<distributionManagement>
<repository>
<id>maven2.releases.levigo.de</id>
<url>https://levigo.de/maven2/content/repositories/levigo-releases/</url>
</repository>
<snapshotRepository>
<id>maven2.snapshots.levigo.de</id>
<url>https://levigo.de/maven2/content/repositories/levigo-snapshots/</url>
</snapshotRepository>
</distributionManagement>

<profiles>
<!-- Activate using the release property: mvn clean install -Prelease -->
<profile>
Expand All @@ -415,22 +423,22 @@
<plugins>
<!-- To release to Maven central -->
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.13</version>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.10.0</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://s01.oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>true</autoReleaseAfterClose>
<publishingServerId>central</publishingServerId>
<autoPublish>true</autoPublish>
<waitUntil>uploaded</waitUntil>
</configuration>
</plugin>

<!-- To generate javadoc -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.3.0</version>
<version>3.4.0</version>
<executions>
<execution>
<id>attach-sources</id>
Expand Down Expand Up @@ -462,7 +470,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>3.0.1</version>
<version>3.1.0</version>
<executions>
<execution>
<id>sign-artifacts</id>
Expand Down
32 changes: 18 additions & 14 deletions src/main/java/org/jadice/filetype/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@
import javax.xml.bind.ValidationEventHandler;
import javax.xml.bind.ValidationEventLocator;

import opennlp.tools.util.model.UncloseableInputStream;
import org.apache.commons.io.FilenameUtils;
import org.jadice.filetype.database.Database;
import org.jadice.filetype.database.DescriptionAction;
import org.jadice.filetype.database.Type;
import org.jadice.filetype.io.MemoryInputStream;
import org.jadice.filetype.io.RandomAccessFileInputStream;
import org.jadice.filetype.io.SeekableInputStream;
import org.jadice.filetype.io.UncloseableMemoryInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -228,17 +227,21 @@ public Map<String, Object> analyze(final InputStream sis, final AnalysisListener
public Map<String, Object> analyze(final InputStream is, final AnalysisListener listener, final String fileName)
throws IOException {
Map<String, Object> result = new HashMap<>();


// POI (3.1-Final) closes the stream during analyszs of office files - use an uncloseable stream wrapper
final UncloseableInputStream uis = new UncloseableInputStream(is);
final UncloseableSeekableInputStreamWrapper usis = new UncloseableSeekableInputStreamWrapper(new MemoryInputStream(uis));
usis.lockClose(); // and don't unlock later as POI attempts to close asynchronously!

final AnalysisListener effectiveListener = listener != null ? listener : DEFAULT_LISTENER;
// POI may close streams during analysis; shield callers by using an uncloseable, seekable wrapper.
// If the input is already seekable, avoid buffering the full stream in memory.
final SeekableInputStream baseStream;
if (is instanceof SeekableInputStream) {
baseStream = (SeekableInputStream) is;
} else {
baseStream = new UncloseableMemoryInputStream(is);
}
final UncloseableSeekableInputStreamWrapper usis = new UncloseableSeekableInputStreamWrapper(baseStream);
usis.lockClose(); // do not unlock later as POI may attempt to close asynchronously
final String sanitizedFileName = fileName != null ? fileName.replaceAll("[:\\\\/*?|<>]", "_") : null;
String extension = FilenameUtils.getExtension(sanitizedFileName);

Context ctx = new Context(usis, result, listener, locale, extension);
Context ctx = new Context(usis, result, effectiveListener, locale, extension);

database.analyze(ctx);

Expand Down Expand Up @@ -267,12 +270,13 @@ public Map<String, Object> analyze(final File file, final AnalysisListener liste
SeekableInputStream sis = new RandomAccessFileInputStream(file);
try {
String fileName = file.getName();
return analyze(sis, null, fileName);
return analyze(sis, listener, fileName);
} finally {
try {
sis.close();
} catch (IOException e) {
listener.error(this, "Exception closing RandomAccessFileInputStream", e);
final AnalysisListener effectiveListener = listener != null ? listener : DEFAULT_LISTENER;
effectiveListener.error(this, "Exception closing RandomAccessFileInputStream", e);
}
}
}
Expand All @@ -289,13 +293,13 @@ public Map<String, Object> analyze(final SeekableInputStream sis) throws IOExcep
}


public Map<String, Object> analyzeWithFilename(final SeekableInputStream sis,final String fileName) throws IOException {
public Map<String, Object> analyzeWithFilename(final SeekableInputStream sis, final String fileName) throws IOException {
return analyze(sis, DEFAULT_LISTENER, fileName);
}

/**
* Analyze the stream supplied via an {@link InputStream}. <br>
* Caveat: the data will be buffered in memory. If you don't like this, supply a
* Caveat: non-seekable streams may be buffered in memory. If you don't like this, supply a
* {@link SeekableInputStream} implementation or a {@link File} instead.
*
* @param is
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package org.jadice.filetype.io;

import java.io.IOException;
import java.io.InputStream;


/**
* An {@link MemoryInputStream} which cannot be closed.
*/
public class UncloseableMemoryInputStream extends MemoryInputStream {
public UncloseableMemoryInputStream(final InputStream is) throws IOException {
super(is);
}

/**
* This method does not have any effect, as the {@link MemoryInputStream} cannot be closed.
*/
@Override
public void close() throws IOException {
// ignore
}
}
Loading