feat: add file deduplication and hash checking for uploads
- Implemented SHA-256 hashing for uploaded files to enable deduplication. - Added CheckHash method to verify if a file with the same hash already exists. - Updated Upload method to reuse existing media assets if a duplicate is found. - Introduced a new hash column in the media_assets table to store file hashes. - Enhanced the upload process to include progress tracking and hash calculation. - Modified frontend to check for existing files before uploading and to show upload progress. - Added vuedraggable for drag-and-drop functionality in the content editing view.
This commit is contained in:
@@ -2,6 +2,8 @@ package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"os"
|
||||
@@ -41,8 +43,54 @@ func (s *common) Options(ctx context.Context) (*common_dto.OptionsResponse, erro
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileHeader, typeArg string) (*common_dto.UploadResult, error) {
|
||||
// Mock Upload to S3/MinIO (Here we just generate key, actual upload handling via direct upload or stream is better)
|
||||
func (s *common) CheckHash(ctx context.Context, userID int64, hash string) (*common_dto.UploadResult, error) {
|
||||
existing, err := models.MediaAssetQuery.WithContext(ctx).Where(models.MediaAssetQuery.Hash.Eq(hash)).First()
|
||||
if err != nil {
|
||||
return nil, nil // Not found, proceed to upload
|
||||
}
|
||||
|
||||
// Found existing file (Global deduplication hit)
|
||||
|
||||
// Check if user already has it (Logic deduplication hit)
|
||||
myExisting, err := models.MediaAssetQuery.WithContext(ctx).
|
||||
Where(models.MediaAssetQuery.Hash.Eq(hash), models.MediaAssetQuery.UserID.Eq(userID)).
|
||||
First()
|
||||
if err == nil {
|
||||
return s.composeUploadResult(myExisting), nil
|
||||
}
|
||||
|
||||
// Create new record for this user reusing existing ObjectKey
|
||||
t, err := models.TenantQuery.WithContext(ctx).Where(models.TenantQuery.UserID.Eq(userID)).First()
|
||||
var tid int64 = 0
|
||||
if err == nil {
|
||||
tid = t.ID
|
||||
}
|
||||
|
||||
asset := &models.MediaAsset{
|
||||
TenantID: tid,
|
||||
UserID: userID,
|
||||
Type: existing.Type,
|
||||
Status: consts.MediaAssetStatusUploaded,
|
||||
Provider: existing.Provider,
|
||||
Bucket: existing.Bucket,
|
||||
ObjectKey: existing.ObjectKey,
|
||||
Hash: hash,
|
||||
Meta: existing.Meta,
|
||||
}
|
||||
|
||||
if err := models.MediaAssetQuery.WithContext(ctx).Create(asset); err != nil {
|
||||
return nil, errorx.ErrDatabaseError.WithCause(err)
|
||||
}
|
||||
|
||||
return s.composeUploadResult(asset), nil
|
||||
}
|
||||
|
||||
func (s *common) Upload(
|
||||
ctx context.Context,
|
||||
userID int64,
|
||||
file *multipart.FileHeader,
|
||||
typeArg string,
|
||||
) (*common_dto.UploadResult, error) { // Mock Upload to S3/MinIO (Here we just generate key, actual upload handling via direct upload or stream is better)
|
||||
// But this Upload endpoint accepts file. So we save it.
|
||||
|
||||
objectKey := uuid.NewString() + "_" + file.Filename
|
||||
@@ -60,7 +108,7 @@ func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileH
|
||||
}
|
||||
dstPath := filepath.Join(localPath, objectKey)
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil {
|
||||
if err := os.MkdirAll(filepath.Dir(dstPath), 0o755); err != nil {
|
||||
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to create storage directory")
|
||||
}
|
||||
|
||||
@@ -68,45 +116,90 @@ func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileH
|
||||
if err != nil {
|
||||
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to create destination file")
|
||||
}
|
||||
defer dst.Close()
|
||||
|
||||
if _, err = io.Copy(dst, src); err != nil {
|
||||
// Hash calculation while copying
|
||||
hasher := sha256.New()
|
||||
size, err := io.Copy(io.MultiWriter(dst, hasher), src)
|
||||
dst.Close() // Close immediately to allow removal if needed
|
||||
if err != nil {
|
||||
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to save file content")
|
||||
}
|
||||
|
||||
url := s.GetAssetURL(objectKey)
|
||||
hash := hex.EncodeToString(hasher.Sum(nil))
|
||||
|
||||
// ... rest ...
|
||||
t, err := models.TenantQuery.WithContext(ctx).Where(models.TenantQuery.UserID.Eq(userID)).First()
|
||||
var tid int64 = 0
|
||||
if err == nil {
|
||||
tid = t.ID
|
||||
}
|
||||
|
||||
asset := &models.MediaAsset{
|
||||
TenantID: tid,
|
||||
UserID: userID,
|
||||
Type: consts.MediaAssetType(typeArg),
|
||||
Status: consts.MediaAssetStatusUploaded,
|
||||
Provider: "local",
|
||||
Bucket: "default",
|
||||
ObjectKey: objectKey,
|
||||
Meta: types.NewJSONType(fields.MediaAssetMeta{
|
||||
Size: file.Size,
|
||||
}),
|
||||
var asset *models.MediaAsset
|
||||
|
||||
// Deduplication Check
|
||||
existing, err := models.MediaAssetQuery.WithContext(ctx).Where(models.MediaAssetQuery.Hash.Eq(hash)).First()
|
||||
if err == nil {
|
||||
// Found existing file (Storage Deduplication)
|
||||
os.Remove(dstPath) // Delete the duplicate we just wrote
|
||||
|
||||
// Check if user already has it (Logic Deduplication)
|
||||
myExisting, err := models.MediaAssetQuery.WithContext(ctx).
|
||||
Where(models.MediaAssetQuery.Hash.Eq(hash), models.MediaAssetQuery.UserID.Eq(userID)).
|
||||
First()
|
||||
if err == nil {
|
||||
return s.composeUploadResult(myExisting), nil
|
||||
}
|
||||
|
||||
// Create new link for user reusing existing ObjectKey
|
||||
asset = &models.MediaAsset{
|
||||
TenantID: tid,
|
||||
UserID: userID,
|
||||
Type: consts.MediaAssetType(typeArg),
|
||||
Status: consts.MediaAssetStatusUploaded,
|
||||
Provider: existing.Provider,
|
||||
Bucket: existing.Bucket,
|
||||
ObjectKey: existing.ObjectKey, // Reuse key
|
||||
Hash: hash,
|
||||
Meta: existing.Meta,
|
||||
}
|
||||
} else {
|
||||
// New unique file
|
||||
asset = &models.MediaAsset{
|
||||
TenantID: tid,
|
||||
UserID: userID,
|
||||
Type: consts.MediaAssetType(typeArg),
|
||||
Status: consts.MediaAssetStatusUploaded,
|
||||
Provider: "local",
|
||||
Bucket: "default",
|
||||
ObjectKey: objectKey,
|
||||
Hash: hash,
|
||||
Meta: types.NewJSONType(fields.MediaAssetMeta{
|
||||
Size: size,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
if err := models.MediaAssetQuery.WithContext(ctx).Create(asset); err != nil {
|
||||
return nil, errorx.ErrDatabaseError.WithCause(err)
|
||||
}
|
||||
|
||||
return s.composeUploadResult(asset), nil
|
||||
}
|
||||
|
||||
func (s *common) composeUploadResult(asset *models.MediaAsset) *common_dto.UploadResult {
|
||||
url := s.GetAssetURL(asset.ObjectKey)
|
||||
filename := filepath.Base(asset.ObjectKey)
|
||||
// Try to get original filename if stored? Currently objectKey has UUID prefix.
|
||||
// We can store original filename in Meta if needed. For now, just return valid result.
|
||||
// Meta is JSONType wrapper.
|
||||
size := asset.Meta.Data().Size
|
||||
|
||||
return &common_dto.UploadResult{
|
||||
ID: cast.ToString(asset.ID),
|
||||
URL: url,
|
||||
Filename: file.Filename,
|
||||
Size: file.Size,
|
||||
MimeType: file.Header.Get("Content-Type"),
|
||||
}, nil
|
||||
Filename: filename,
|
||||
Size: size,
|
||||
MimeType: "application/octet-stream", // TODO: Store mime type in DB
|
||||
}
|
||||
}
|
||||
|
||||
func (s *common) GetAssetURL(objectKey string) string {
|
||||
|
||||
Reference in New Issue
Block a user