feat: add file deduplication and hash checking for uploads

- Implemented SHA-256 hashing for uploaded files to enable deduplication.
- Added CheckHash method to verify if a file with the same hash already exists.
- Updated Upload method to reuse existing media assets if a duplicate is found.
- Introduced a new hash column in the media_assets table to store file hashes.
- Enhanced the upload process to include progress tracking and hash calculation.
- Modified frontend to check for existing files before uploading and to show upload progress.
- Added vuedraggable for drag-and-drop functionality in the content editing view.
This commit is contained in:
2025-12-31 19:16:02 +08:00
parent f560b95ec0
commit 221b068a84
13 changed files with 414 additions and 184 deletions

View File

@@ -2,6 +2,8 @@ package services
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"mime/multipart"
"os"
@@ -41,8 +43,54 @@ func (s *common) Options(ctx context.Context) (*common_dto.OptionsResponse, erro
}, nil
}
func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileHeader, typeArg string) (*common_dto.UploadResult, error) {
// Mock Upload to S3/MinIO (Here we just generate key, actual upload handling via direct upload or stream is better)
func (s *common) CheckHash(ctx context.Context, userID int64, hash string) (*common_dto.UploadResult, error) {
existing, err := models.MediaAssetQuery.WithContext(ctx).Where(models.MediaAssetQuery.Hash.Eq(hash)).First()
if err != nil {
return nil, nil // Not found, proceed to upload
}
// Found existing file (Global deduplication hit)
// Check if user already has it (Logic deduplication hit)
myExisting, err := models.MediaAssetQuery.WithContext(ctx).
Where(models.MediaAssetQuery.Hash.Eq(hash), models.MediaAssetQuery.UserID.Eq(userID)).
First()
if err == nil {
return s.composeUploadResult(myExisting), nil
}
// Create new record for this user reusing existing ObjectKey
t, err := models.TenantQuery.WithContext(ctx).Where(models.TenantQuery.UserID.Eq(userID)).First()
var tid int64 = 0
if err == nil {
tid = t.ID
}
asset := &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: existing.Type,
Status: consts.MediaAssetStatusUploaded,
Provider: existing.Provider,
Bucket: existing.Bucket,
ObjectKey: existing.ObjectKey,
Hash: hash,
Meta: existing.Meta,
}
if err := models.MediaAssetQuery.WithContext(ctx).Create(asset); err != nil {
return nil, errorx.ErrDatabaseError.WithCause(err)
}
return s.composeUploadResult(asset), nil
}
func (s *common) Upload(
ctx context.Context,
userID int64,
file *multipart.FileHeader,
typeArg string,
) (*common_dto.UploadResult, error) { // Mock Upload to S3/MinIO (Here we just generate key, actual upload handling via direct upload or stream is better)
// But this Upload endpoint accepts file. So we save it.
objectKey := uuid.NewString() + "_" + file.Filename
@@ -60,7 +108,7 @@ func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileH
}
dstPath := filepath.Join(localPath, objectKey)
if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil {
if err := os.MkdirAll(filepath.Dir(dstPath), 0o755); err != nil {
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to create storage directory")
}
@@ -68,45 +116,90 @@ func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileH
if err != nil {
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to create destination file")
}
defer dst.Close()
if _, err = io.Copy(dst, src); err != nil {
// Hash calculation while copying
hasher := sha256.New()
size, err := io.Copy(io.MultiWriter(dst, hasher), src)
dst.Close() // Close immediately to allow removal if needed
if err != nil {
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to save file content")
}
url := s.GetAssetURL(objectKey)
hash := hex.EncodeToString(hasher.Sum(nil))
// ... rest ...
t, err := models.TenantQuery.WithContext(ctx).Where(models.TenantQuery.UserID.Eq(userID)).First()
var tid int64 = 0
if err == nil {
tid = t.ID
}
asset := &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: consts.MediaAssetType(typeArg),
Status: consts.MediaAssetStatusUploaded,
Provider: "local",
Bucket: "default",
ObjectKey: objectKey,
Meta: types.NewJSONType(fields.MediaAssetMeta{
Size: file.Size,
}),
var asset *models.MediaAsset
// Deduplication Check
existing, err := models.MediaAssetQuery.WithContext(ctx).Where(models.MediaAssetQuery.Hash.Eq(hash)).First()
if err == nil {
// Found existing file (Storage Deduplication)
os.Remove(dstPath) // Delete the duplicate we just wrote
// Check if user already has it (Logic Deduplication)
myExisting, err := models.MediaAssetQuery.WithContext(ctx).
Where(models.MediaAssetQuery.Hash.Eq(hash), models.MediaAssetQuery.UserID.Eq(userID)).
First()
if err == nil {
return s.composeUploadResult(myExisting), nil
}
// Create new link for user reusing existing ObjectKey
asset = &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: consts.MediaAssetType(typeArg),
Status: consts.MediaAssetStatusUploaded,
Provider: existing.Provider,
Bucket: existing.Bucket,
ObjectKey: existing.ObjectKey, // Reuse key
Hash: hash,
Meta: existing.Meta,
}
} else {
// New unique file
asset = &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: consts.MediaAssetType(typeArg),
Status: consts.MediaAssetStatusUploaded,
Provider: "local",
Bucket: "default",
ObjectKey: objectKey,
Hash: hash,
Meta: types.NewJSONType(fields.MediaAssetMeta{
Size: size,
}),
}
}
if err := models.MediaAssetQuery.WithContext(ctx).Create(asset); err != nil {
return nil, errorx.ErrDatabaseError.WithCause(err)
}
return s.composeUploadResult(asset), nil
}
func (s *common) composeUploadResult(asset *models.MediaAsset) *common_dto.UploadResult {
url := s.GetAssetURL(asset.ObjectKey)
filename := filepath.Base(asset.ObjectKey)
// Try to get original filename if stored? Currently objectKey has UUID prefix.
// We can store original filename in Meta if needed. For now, just return valid result.
// Meta is JSONType wrapper.
size := asset.Meta.Data().Size
return &common_dto.UploadResult{
ID: cast.ToString(asset.ID),
URL: url,
Filename: file.Filename,
Size: file.Size,
MimeType: file.Header.Get("Content-Type"),
}, nil
Filename: filename,
Size: size,
MimeType: "application/octet-stream", // TODO: Store mime type in DB
}
}
func (s *common) GetAssetURL(objectKey string) string {