智能档案归档系统
├── 后端服务 (Node.js + Express/Koa)
├── AI处理层 (DeepSeek API集成)
├── 文件存储层
├── 数据库层 (MongoDB/PostgreSQL)
└── 前端界面 (Vue/React)
{
"后端": "Node.js + Express",
"AI模型": "DeepSeek API",
"数据库": "MongoDB (存储元数据) + MinIO (文件存储)",
"文件处理": "Multer + PDF.js + Tesseract.js",
"搜索": "Elasticsearch (可选)",
"前端": "Vue.js + Element UI"
}
// server.js
const express = require('express');
const multer = require('multer');
const { DeepSeekAI } = require('deepseek-api');
const fs = require('fs');
const path = require('path');
const app = express();
const upload = multer({ dest: 'uploads/' });
// DeepSeek配置
const deepseek = new DeepSeekAI({
apiKey: process.env.DEEPSEEK_API_KEY,
baseURL: 'https://api.deepseek.com'
});
// 智能档案分析
async function analyzeDocument(filePath, fileType) {
// 1. 提取文本内容
const textContent = await extractText(filePath, fileType);
// 2. 使用DeepSeek进行分析
const prompt = `
请分析以下文档内容,提取以下信息:
1. 文档类型(合同、报告、发票、简历等)
2. 关键实体(人名、公司名、日期、金额等)
3. 主题分类
4. 生成摘要(100字内)
5. 建议的标签(3-5个)
文档内容:
${textContent}
`;
const response = await deepseek.chat.completions.create({
model: "deepseek-chat",
messages: [{ role: "user", content: prompt }],
temperature: 0.3
});
return JSON.parse(response.choices[0].message.content);
}
// 档案上传接口
app.post('/api/archive/upload', upload.single('file'), async (req, res) => {
try {
const file = req.file;
const metadata = req.body;
// 1. 分析文档
const analysis = await analyzeDocument(file.path, file.mimetype);
// 2. 智能分类
const category = await classifyDocument(analysis);
// 3. 提取关键词
const keywords = await extractKeywords(analysis);
// 4. 保存到数据库
const archiveRecord = {
filename: file.originalname,
filepath: file.path,
filesize: file.size,
filetype: file.mimetype,
category: category,
metadata: {
...analysis,
keywords: keywords,
uploadDate: new Date(),
uploadedBy: metadata.userId
}
};
// 保存到MongoDB
await ArchiveModel.create(archiveRecord);
res.json({
success: true,
data: {
id: archiveRecord._id,
analysis: analysis,
category: category
}
});
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// 智能搜索接口
app.get('/api/archive/search', async (req, res) => {
const { query } = req.query;
// 使用DeepSeek理解搜索意图
const searchIntent = await deepseek.chat.completions.create({
model: "deepseek-chat",
messages: [{
role: "user",
content: `用户搜索:"${query}",请分析搜索意图并提取关键词`
}]
});
// 执行语义搜索
const results = await semanticSearch(searchIntent, query);
res.json(results);
});
// classification.js
class SmartArchiveClassifier {
constructor() {
this.categories = [
'财务文档', '人事档案', '合同协议',
'技术文档', '行政文件', '项目报告'
];
}
async classifyDocument(analysis) {
const prompt = `
根据以下分析结果,将文档分类到以下类别之一:
${this.categories.join(', ')}
分析结果:
${JSON.stringify(analysis, null, 2)}
返回格式:{ "category": "类别名", "confidence": 0.95 }
`;
const response = await deepseek.chat.completions.create({
model: "deepseek-chat",
messages: [{ role: "user", content: prompt }],
temperature: 0.1
});
return JSON.parse(response.choices[0].message.content);
}
}
// tagGenerator.js
class AutoTagGenerator {
async generateTags(content, analysis) {
const prompt = `
为以下文档生成智能标签(3-8个):
1. 基于内容主题
2. 基于文档类型
3. 基于关键实体
4. 考虑档案管理需求
文档信息:
内容摘要:${analysis.summary}
关键实体:${analysis.entities.join(', ')}
文档类型:${analysis.documentType}
要求:返回JSON数组格式,包含标签和权重
`;
const response = await deepseek.chat.completions.create({
model: "deepseek-chat",
messages: [{ role: "user", content: prompt }]
});
return JSON.parse(response.choices[0].message.content);
}
}
// fileProcessor.js
const pdf = require('pdf-parse');
const tesseract = require('tesseract.js');
class FileProcessor {
async extractText(filePath, mimeType) {
if (mimeType === 'application/pdf') {
return await this.extractFromPDF(filePath);
} else if (mimeType.includes('image/')) {
return await this.extractFromImage(filePath);
} else if (mimeType.includes('text/') ||
mimeType.includes('application/msword')) {
return await this.extractFromText(filePath);
}
}
async extractFromPDF(filePath) {
const dataBuffer = fs.readFileSync(filePath);
const data = await pdf(dataBuffer);
return data.text;
}
async extractFromImage(filePath) {
const { data: { text } } = await tesseract.recognize(filePath, 'chi_sim+eng');
return text;
}
}
// models/Archive.js
const mongoose = require('mongoose');
const ArchiveSchema = new mongoose.Schema({
filename: { type: String, required: true },
originalName: String,
fileType: String,
fileSize: Number,
storagePath: String,
// AI分析字段
category: String,
summary: String,
tags: [{
name: String,
weight: Number
}],
entities: [{
type: String,
value: String,
entityType: String // PERSON, COMPANY, DATE, AMOUNT等
}],
// 元数据
uploadDate: { type: Date, default: Date.now },
uploadedBy: String,
lastAccessed: Date,
retentionPeriod: Number, // 保留期限(年)
// 版本控制
versions: [{
version: Number,
modifiedAt: Date,
modifiedBy: String,
changes: String
}],
// 权限控制
permissions: [{
userId: String,
role: String, // viewer, editor, admin
grantedAt: Date
}]
});
module.exports = mongoose.model('Archive', ArchiveSchema);
# docker-compose.yml
version: '3.8'
services:
api:
build: .
ports:
- "3000:3000"
environment:
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}
- MONGODB_URI=mongodb://mongodb:27017/archive
- REDIS_URL=redis://redis:6379
depends_on:
- mongodb
- redis
mongodb:
image: mongo:latest
volumes:
- mongo_data:/data/db
minio:
image: minio/minio
ports:
- "9000:9000"
- "9001:9001"
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password123
volumes:
- minio_data:/data
redis:
image: redis:alpine
volumes:
mongo_data:
minio_data:
<!-- ArchiveUpload.vue -->
<template>
<div class="upload-container">
<el-upload
drag
multiple
:action="uploadUrl"
:on-success="handleSuccess"
:before-upload="beforeUpload"
>
<div class="upload-area">
<i class="el-icon-upload"></i>
<div>拖拽文件到此处或点击上传</div>
</div>
</el-upload>
<div v-if="analysisResult" class="analysis-result">
<h3>AI分析结果</h3>
<el-tag>{{ analysisResult.category }}</el-tag>
<p>{{ analysisResult.summary }}</p>
<div class="tags">
<el-tag
v-for="tag in analysisResult.tags"
:key="tag.name"
:type="tag.weight > 0.7 ? 'success' : 'info'"
>
{{ tag.name }}
</el-tag>
</div>
</div>
</div>
</template>
性能优化
安全增强
扩展功能
监控运维
这个系统充分利用了DeepSeek的NLP能力,实现了档案管理的智能化。可以根据实际需求调整功能模块和复杂度。需要我详细解释某个模块吗?