Browse Source

oss-store 中不再使用 tika 获取上传文件的 content-type, 使用 linux file 命令来代替

reghao 1 day ago
parent
commit
1cbeeca3fd

+ 7 - 2
oss-store/pom.xml

@@ -56,7 +56,7 @@
             <optional>true</optional>
         </dependency>
 
-        <dependency>
+        <!--<dependency>
             <groupId>org.apache.tika</groupId>
             <artifactId>tika-core</artifactId>
             <version>3.2.2</version>
@@ -65,7 +65,7 @@
             <groupId>org.apache.tika</groupId>
             <artifactId>tika-parsers-standard-package</artifactId>
             <version>3.2.2</version>
-        </dependency>
+        </dependency>-->
 
         <dependency>
             <groupId>io.netty</groupId>
@@ -79,6 +79,11 @@
             <version>3.3.6</version>
         </dependency>
 
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.19.0</version>
+        </dependency>
         <dependency>
             <groupId>commons-codec</groupId>
             <artifactId>commons-codec</artifactId>

+ 3 - 13
oss-store/src/main/java/cn/reghao/oss/store/disk/HddFlushService.java

@@ -4,7 +4,6 @@ import cn.reghao.jutil.jdk.thread.ThreadFactoryBuilder;
 import cn.reghao.oss.api.dto.rest.UploadDoneResult;
 import cn.reghao.oss.api.iface.ConsoleService;
 import cn.reghao.oss.store.config.OssStoreConfig;
-import cn.reghao.oss.store.util.FileUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.stereotype.Service;
 
@@ -75,7 +74,6 @@ public class HddFlushService {
                     return;
                 }
 
-                String contentType = flushResult.contentType;
                 Path tmpFile = Path.of(hddTempPath);
                 String filePath = diskService.getHddDataPath(sha256sum);
                 Path finalPath = Path.of(filePath);
@@ -142,15 +140,7 @@ public class HddFlushService {
     private FlushResult moveAndChecksum(String src, String dest) throws Exception {
         MessageDigest digest = MessageDigest.getInstance("SHA-256");
         Path srcPath = Paths.get(src);
-        String detectedType = FileUtil.getContentType(src);
-
-        // 1. Tika 识别:利用 Path 获取,它内部会按需打开 Channel
-        // 只读文件头,速度极快,不影响后续搬运
-        /*try (TikaInputStream tikaStream = TikaInputStream.get(srcPath)) {
-            detectedType = TIKA.detect(tikaStream, new Metadata());
-        }*/
-
-        // 2. 核心搬运:使用 FileChannel + DirectBuffer
+        // 核心搬运:使用 FileChannel + DirectBuffer
         try (FileChannel srcChannel = FileChannel.open(srcPath, StandardOpenOption.READ);
              FileChannel destChannel = FileChannel.open(Paths.get(dest),
                      StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.READ)) {
@@ -177,7 +167,7 @@ public class HddFlushService {
 
         // 3. 转换为十六进制字符串
         String hex = bytesToHex(digest.digest());
-        return new FlushResult(hex, detectedType);
+        return new FlushResult(hex);
     }
 
     private String bytesToHex(byte[] bytes) {
@@ -194,5 +184,5 @@ public class HddFlushService {
     }
 
     // --- 结果包装 ---
-    public record FlushResult(String sha256sum, String contentType) {}
+    public record FlushResult(String sha256sum) {}
 }

+ 1 - 1
oss-store/src/main/java/cn/reghao/oss/store/handler/OssMultipartUploadHandler.java

@@ -299,7 +299,7 @@ public class OssMultipartUploadHandler extends SimpleChannelInboundHandler<HttpO
             uploadResult.setUploadStatus(UploadStatus.FLUSHING.getCode());
             consoleService.registerAndBind(uploadResult);
 
-            // 2. 触发合并逻辑 (合并 SSD 上的碎片)
+            // 2. 把文件从 ssd 迁移到 hdd
             hddFlushService.triggerFlush(currentUploadId, clientSha256sum);
             // 3. 清理内存位图
             UploadProgressManager.remove(currentUploadId);

+ 40 - 4
oss-store/src/main/java/cn/reghao/oss/store/util/FileUtil.java

@@ -1,11 +1,14 @@
 package cn.reghao.oss.store.util;
 
 import lombok.extern.slf4j.Slf4j;
-import org.apache.tika.Tika;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
+//import org.apache.tika.Tika;
+//import org.apache.tika.io.TikaInputStream;
+//import org.apache.tika.metadata.Metadata;
 
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
 import java.nio.file.Path;
+import java.util.concurrent.TimeUnit;
 
 /**
  * @author reghao
@@ -13,7 +16,8 @@ import java.nio.file.Path;
  */
 @Slf4j
 public class FileUtil {
-    private static final Tika tika = new Tika();
+    private static final String fileCmd = "/usr/bin/file";
+    /*private static final Tika tika = new Tika();
 
     public static String getContentType(String filePath) {
         String contentType = "application/octet-stream";
@@ -23,6 +27,38 @@ public class FileUtil {
             log.error(e.getMessage());
         }
 
+        return contentType;
+    }*/
+
+    public static String getContentType(String filePath) {
+        String contentType = "application/octet-stream";
+        Process process = null;
+        try {
+            ProcessBuilder pb = new ProcessBuilder(fileCmd, "--mime-type", "-b", filePath);
+            // 关键点:把错误流重定向到标准输出流,这样不管成功还是失败,都能通过一个 reader 读到结果
+            pb.redirectErrorStream(true);
+            process = pb.start();
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
+                String line = reader.readLine();
+                // 2. 检查 null 并在前置做 trim
+                String mimeType = (line != null) ? line.trim() : "";
+                // 3. 安全超时控制,防止外部命令挂起导致 Java 线程卡死
+                boolean finished = process.waitFor(3, TimeUnit.SECONDS);
+                if (finished && process.exitValue() == 0 && !mimeType.isEmpty()) {
+                    // 排除 file 命令自身报错输出的提示(比如 "cannot open...")
+                    if (!mimeType.contains("cannot open") && !mimeType.contains("No such file")) {
+                        return mimeType;
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        } finally {
+            if (process != null) {
+                // 确保释放系统进程资源
+                process.destroy();
+            }
+        }
         return contentType;
     }