feat(huggingface): implement direct upload for large files (>20MB)

- Add frontend direct upload to HuggingFace S3, bypassing CF Workers limits
- Add /api/huggingface/getUploadUrl endpoint to get LFS upload URLs
- Add /api/huggingface/commitUpload endpoint to commit file references
- Support multipart upload for very large files
- SHA256 computed in frontend to avoid CF Workers CPU timeout
- Small files (<20MB) still use proxy upload through CF Workers
This commit is contained in:
axibayuit
2025-12-30 20:42:38 +08:00
parent fd92a024fd
commit e973c65e9c
14 changed files with 318 additions and 3 deletions

View File

@@ -0,0 +1,142 @@
/**
* HuggingFace 大文件提交 API
*
* 在前端直接上传文件到 S3 后,调用此 API 提交 LFS 文件引用
*/
import { HuggingFaceAPI } from '../../utils/huggingfaceAPI.js';
import { getUploadConfig } from '../../utils/sysConfig.js';
import { getDatabase } from '../../utils/databaseAdapter.js';
import { moderateContent, endUpload } from '../../upload/uploadTools.js';
export async function onRequestPost(context) {
const { request, env, waitUntil } = context;
try {
// 验证认证码
const authCode = request.headers.get('authcode');
if (env.AUTH_CODE && authCode !== env.AUTH_CODE) {
return new Response(JSON.stringify({ error: 'Unauthorized' }), {
status: 401,
headers: { 'Content-Type': 'application/json' }
});
}
const body = await request.json();
const { fullId, filePath, sha256, fileSize, fileName, channelName, multipartParts } = body;
if (!fullId || !filePath || !sha256 || !fileSize) {
return new Response(JSON.stringify({
error: 'Missing required fields: fullId, filePath, sha256, fileSize'
}), {
status: 400,
headers: { 'Content-Type': 'application/json' }
});
}
// 获取 HuggingFace 配置
const uploadConfig = await getUploadConfig(env);
const hfSettings = uploadConfig.huggingface;
if (!hfSettings || !hfSettings.channels || hfSettings.channels.length === 0) {
return new Response(JSON.stringify({ error: 'No HuggingFace channel configured' }), {
status: 400,
headers: { 'Content-Type': 'application/json' }
});
}
// 选择渠道
let hfChannel;
if (channelName) {
hfChannel = hfSettings.channels.find(c => c.name === channelName);
}
if (!hfChannel) {
hfChannel = hfSettings.channels[0];
}
if (!hfChannel || !hfChannel.token || !hfChannel.repo) {
return new Response(JSON.stringify({ error: 'HuggingFace channel not properly configured' }), {
status: 400,
headers: { 'Content-Type': 'application/json' }
});
}
const huggingfaceAPI = new HuggingFaceAPI(hfChannel.token, hfChannel.repo, hfChannel.isPrivate || false);
// 如果有 multipart parts需要先完成 multipart 上传
if (multipartParts && multipartParts.length > 0) {
console.log('Completing multipart upload...');
// multipartParts 格式: [{ partNumber, etag, completionUrl }]
// 这里需要调用 HuggingFace 的 multipart complete API
// 但由于前端已经完成了所有分片上传,这里只需要提交
}
// 提交 LFS 文件引用
console.log('Committing LFS file...');
const commitResult = await huggingfaceAPI.commitLfsFile(
filePath,
sha256,
fileSize,
`Upload ${fileName || fullId}`
);
console.log('Commit result:', JSON.stringify(commitResult));
// 构建文件 URL
const fileUrl = `https://huggingface.co/datasets/${hfChannel.repo}/resolve/main/${filePath}`;
// 构建 metadata
const metadata = {
FileName: fileName || fullId,
Channel: "HuggingFace",
ChannelName: hfChannel.name || "HuggingFace_env",
FileSize: (fileSize / 1024 / 1024).toFixed(2),
HfRepo: hfChannel.repo,
HfFilePath: filePath,
HfToken: hfChannel.token,
HfIsPrivate: hfChannel.isPrivate || false,
HfFileUrl: fileUrl,
TimeStamp: Date.now(),
Label: "None"
};
// 图像审查(公开仓库)
if (!hfChannel.isPrivate) {
try {
metadata.Label = await moderateContent(env, fileUrl);
} catch (e) {
console.warn('Content moderation failed:', e.message);
}
}
// 写入数据库
const db = getDatabase(env);
await db.put(fullId, "", { metadata });
// 结束上传(更新索引等)
const uploadContext = {
env,
waitUntil,
uploadConfig
};
waitUntil(endUpload(uploadContext, fullId, metadata));
// 返回成功响应
const returnLink = `/file/${fullId}`;
return new Response(JSON.stringify({
success: true,
src: returnLink,
fileUrl,
fullId
}), {
status: 200,
headers: { 'Content-Type': 'application/json' }
});
} catch (error) {
console.error('commitUpload error:', error.message);
return new Response(JSON.stringify({ error: error.message }), {
status: 500,
headers: { 'Content-Type': 'application/json' }
});
}
}