镇江市文章资讯

基于Node.js+DeepSeek打造一个智能档案归档系统

2026-03-29 13:40:02 浏览次数:1
详细信息

一、系统架构设计

核心模块

智能档案归档系统
├── 后端服务 (Node.js + Express/Koa)
├── AI处理层 (DeepSeek API集成)
├── 文件存储层
├── 数据库层 (MongoDB/PostgreSQL)
└── 前端界面 (Vue/React)

二、技术栈

{
  "后端": "Node.js + Express",
  "AI模型": "DeepSeek API",
  "数据库": "MongoDB (存储元数据) + MinIO (文件存储)",
  "文件处理": "Multer + PDF.js + Tesseract.js",
  "搜索": "Elasticsearch (可选)",
  "前端": "Vue.js + Element UI"
}

三、核心功能实现

1. 后端服务搭建

// server.js
const express = require('express');
const multer = require('multer');
const { DeepSeekAI } = require('deepseek-api');
const fs = require('fs');
const path = require('path');

const app = express();
const upload = multer({ dest: 'uploads/' });

// DeepSeek配置
const deepseek = new DeepSeekAI({
  apiKey: process.env.DEEPSEEK_API_KEY,
  baseURL: 'https://api.deepseek.com'
});

// 智能档案分析
async function analyzeDocument(filePath, fileType) {
  // 1. 提取文本内容
  const textContent = await extractText(filePath, fileType);

  // 2. 使用DeepSeek进行分析
  const prompt = `
    请分析以下文档内容,提取以下信息:
    1. 文档类型(合同、报告、发票、简历等)
    2. 关键实体(人名、公司名、日期、金额等)
    3. 主题分类
    4. 生成摘要(100字内)
    5. 建议的标签(3-5个)

    文档内容:
    ${textContent}
  `;

  const response = await deepseek.chat.completions.create({
    model: "deepseek-chat",
    messages: [{ role: "user", content: prompt }],
    temperature: 0.3
  });

  return JSON.parse(response.choices[0].message.content);
}

// 档案上传接口
app.post('/api/archive/upload', upload.single('file'), async (req, res) => {
  try {
    const file = req.file;
    const metadata = req.body;

    // 1. 分析文档
    const analysis = await analyzeDocument(file.path, file.mimetype);

    // 2. 智能分类
    const category = await classifyDocument(analysis);

    // 3. 提取关键词
    const keywords = await extractKeywords(analysis);

    // 4. 保存到数据库
    const archiveRecord = {
      filename: file.originalname,
      filepath: file.path,
      filesize: file.size,
      filetype: file.mimetype,
      category: category,
      metadata: {
        ...analysis,
        keywords: keywords,
        uploadDate: new Date(),
        uploadedBy: metadata.userId
      }
    };

    // 保存到MongoDB
    await ArchiveModel.create(archiveRecord);

    res.json({
      success: true,
      data: {
        id: archiveRecord._id,
        analysis: analysis,
        category: category
      }
    });

  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

// 智能搜索接口
app.get('/api/archive/search', async (req, res) => {
  const { query } = req.query;

  // 使用DeepSeek理解搜索意图
  const searchIntent = await deepseek.chat.completions.create({
    model: "deepseek-chat",
    messages: [{
      role: "user",
      content: `用户搜索:"${query}",请分析搜索意图并提取关键词`
    }]
  });

  // 执行语义搜索
  const results = await semanticSearch(searchIntent, query);

  res.json(results);
});

2. 智能分类模块

// classification.js
class SmartArchiveClassifier {
  constructor() {
    this.categories = [
      '财务文档', '人事档案', '合同协议', 
      '技术文档', '行政文件', '项目报告'
    ];
  }

  async classifyDocument(analysis) {
    const prompt = `
      根据以下分析结果,将文档分类到以下类别之一:
      ${this.categories.join(', ')}

      分析结果:
      ${JSON.stringify(analysis, null, 2)}

      返回格式:{ "category": "类别名", "confidence": 0.95 }
    `;

    const response = await deepseek.chat.completions.create({
      model: "deepseek-chat",
      messages: [{ role: "user", content: prompt }],
      temperature: 0.1
    });

    return JSON.parse(response.choices[0].message.content);
  }
}

3. 自动标签生成

// tagGenerator.js
class AutoTagGenerator {
  async generateTags(content, analysis) {
    const prompt = `
      为以下文档生成智能标签(3-8个):
      1. 基于内容主题
      2. 基于文档类型
      3. 基于关键实体
      4. 考虑档案管理需求

      文档信息:
      内容摘要:${analysis.summary}
      关键实体:${analysis.entities.join(', ')}
      文档类型:${analysis.documentType}

      要求:返回JSON数组格式,包含标签和权重
    `;

    const response = await deepseek.chat.completions.create({
      model: "deepseek-chat",
      messages: [{ role: "user", content: prompt }]
    });

    return JSON.parse(response.choices[0].message.content);
  }
}

4. 文件处理模块

// fileProcessor.js
const pdf = require('pdf-parse');
const tesseract = require('tesseract.js');

class FileProcessor {
  async extractText(filePath, mimeType) {
    if (mimeType === 'application/pdf') {
      return await this.extractFromPDF(filePath);
    } else if (mimeType.includes('image/')) {
      return await this.extractFromImage(filePath);
    } else if (mimeType.includes('text/') || 
               mimeType.includes('application/msword')) {
      return await this.extractFromText(filePath);
    }
  }

  async extractFromPDF(filePath) {
    const dataBuffer = fs.readFileSync(filePath);
    const data = await pdf(dataBuffer);
    return data.text;
  }

  async extractFromImage(filePath) {
    const { data: { text } } = await tesseract.recognize(filePath, 'chi_sim+eng');
    return text;
  }
}

5. 数据库模型

// models/Archive.js
const mongoose = require('mongoose');

const ArchiveSchema = new mongoose.Schema({
  filename: { type: String, required: true },
  originalName: String,
  fileType: String,
  fileSize: Number,
  storagePath: String,

  // AI分析字段
  category: String,
  summary: String,
  tags: [{
    name: String,
    weight: Number
  }],
  entities: [{
    type: String,
    value: String,
    entityType: String // PERSON, COMPANY, DATE, AMOUNT等
  }],

  // 元数据
  uploadDate: { type: Date, default: Date.now },
  uploadedBy: String,
  lastAccessed: Date,
  retentionPeriod: Number, // 保留期限(年)

  // 版本控制
  versions: [{
    version: Number,
    modifiedAt: Date,
    modifiedBy: String,
    changes: String
  }],

  // 权限控制
  permissions: [{
    userId: String,
    role: String, // viewer, editor, admin
    grantedAt: Date
  }]
});

module.exports = mongoose.model('Archive', ArchiveSchema);

四、系统功能特性

1. 智能分类

2. 自动标签

3. 语义搜索

4. 安全控制

5. 批量处理

五、部署配置

# docker-compose.yml
version: '3.8'
services:
  api:
    build: .
    ports:
      - "3000:3000"
    environment:
      - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}
      - MONGODB_URI=mongodb://mongodb:27017/archive
      - REDIS_URL=redis://redis:6379
    depends_on:
      - mongodb
      - redis

  mongodb:
    image: mongo:latest
    volumes:
      - mongo_data:/data/db

  minio:
    image: minio/minio
    ports:
      - "9000:9000"
      - "9001:9001"
    environment:
      - MINIO_ROOT_USER=admin
      - MINIO_ROOT_PASSWORD=password123
    volumes:
      - minio_data:/data

  redis:
    image: redis:alpine

volumes:
  mongo_data:
  minio_data:

六、前端示例

<!-- ArchiveUpload.vue -->
<template>
  <div class="upload-container">
    <el-upload
      drag
      multiple
      :action="uploadUrl"
      :on-success="handleSuccess"
      :before-upload="beforeUpload"
    >
      <div class="upload-area">
        <i class="el-icon-upload"></i>
        <div>拖拽文件到此处或点击上传</div>
      </div>
    </el-upload>

    <div v-if="analysisResult" class="analysis-result">
      <h3>AI分析结果</h3>
      <el-tag>{{ analysisResult.category }}</el-tag>
      <p>{{ analysisResult.summary }}</p>
      <div class="tags">
        <el-tag 
          v-for="tag in analysisResult.tags" 
          :key="tag.name"
          :type="tag.weight > 0.7 ? 'success' : 'info'"
        >
          {{ tag.name }}
        </el-tag>
      </div>
    </div>
  </div>
</template>

七、优化建议

性能优化

安全增强

扩展功能

监控运维

这个系统充分利用了DeepSeek的NLP能力,实现了档案管理的智能化。可以根据实际需求调整功能模块和复杂度。需要我详细解释某个模块吗?

相关推荐