feat: add file deduplication and hash checking for uploads

- Implemented SHA-256 hashing for uploaded files to enable deduplication.
- Added CheckHash method to verify if a file with the same hash already exists.
- Updated Upload method to reuse existing media assets if a duplicate is found.
- Introduced a new hash column in the media_assets table to store file hashes.
- Enhanced the upload process to include progress tracking and hash calculation.
- Modified frontend to check for existing files before uploading and to show upload progress.
- Added vuedraggable for drag-and-drop functionality in the content editing view.
This commit is contained in:
2025-12-31 19:16:02 +08:00
parent f560b95ec0
commit 221b068a84
13 changed files with 414 additions and 184 deletions

View File

@@ -35,7 +35,7 @@ func (c *Common) Upload(
if form != nil {
val = form.Type
}
return services.Common.Upload(ctx.Context(), user.ID, file, val)
return services.Common.Upload(ctx, user.ID, file, val)
}
// Get options (enums)
@@ -50,3 +50,19 @@ func (c *Common) Upload(
func (c *Common) GetOptions(ctx fiber.Ctx) (*dto.OptionsResponse, error) {
return services.Common.Options(ctx)
}
// Check file hash for deduplication
//
// @Router /v1/upload/check [get]
// @Summary Check hash
// @Description Check if file hash exists
// @Tags Common
// @Accept json
// @Produce json
// @Param hash query string true "File Hash"
// @Success 200 {object} dto.UploadResult
// @Bind user local key(__ctx_user)
// @Bind hash query
func (c *Common) CheckHash(ctx fiber.Ctx, user *models.User, hash string) (*dto.UploadResult, error) {
return services.Common.CheckHash(ctx, user.ID, hash)
}

View File

@@ -54,6 +54,12 @@ func (r *Routes) Register(router fiber.Router) {
router.Get("/v1/common/options"[len(r.Path()):], DataFunc0(
r.common.GetOptions,
))
r.log.Debugf("Registering route: Get /v1/upload/check -> common.CheckHash")
router.Get("/v1/upload/check"[len(r.Path()):], DataFunc2(
r.common.CheckHash,
Local[*models.User]("__ctx_user"),
QueryParam[string]("hash"),
))
r.log.Debugf("Registering route: Post /v1/upload -> common.Upload")
router.Post("/v1/upload"[len(r.Path()):], DataFunc3(
r.common.Upload,
@@ -106,18 +112,18 @@ func (r *Routes) Register(router fiber.Router) {
Local[*models.User]("__ctx_user"),
QueryParam[string]("id"),
))
r.log.Debugf("Registering route: Get /v1/creator/contents/:id -> creator.GetContent")
router.Get("/v1/creator/contents/:id"[len(r.Path()):], DataFunc2(
r.creator.GetContent,
Local[*models.User]("__ctx_user"),
PathParam[string]("id"),
))
r.log.Debugf("Registering route: Get /v1/creator/contents -> creator.ListContents")
router.Get("/v1/creator/contents"[len(r.Path()):], DataFunc2(
r.creator.ListContents,
Local[*models.User]("__ctx_user"),
Query[dto.CreatorContentListFilter]("filter"),
))
r.log.Debugf("Registering route: Get /v1/creator/contents/:id -> creator.GetContent")
router.Get("/v1/creator/contents/:id"[len(r.Path()):], DataFunc2(
r.creator.GetContent,
Local[*models.User]("__ctx_user"),
PathParam[string]("id"),
))
r.log.Debugf("Registering route: Get /v1/creator/dashboard -> creator.Dashboard")
router.Get("/v1/creator/dashboard"[len(r.Path()):], DataFunc1(
r.creator.Dashboard,

View File

@@ -2,6 +2,8 @@ package services
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"mime/multipart"
"os"
@@ -41,8 +43,54 @@ func (s *common) Options(ctx context.Context) (*common_dto.OptionsResponse, erro
}, nil
}
func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileHeader, typeArg string) (*common_dto.UploadResult, error) {
// Mock Upload to S3/MinIO (Here we just generate key, actual upload handling via direct upload or stream is better)
func (s *common) CheckHash(ctx context.Context, userID int64, hash string) (*common_dto.UploadResult, error) {
existing, err := models.MediaAssetQuery.WithContext(ctx).Where(models.MediaAssetQuery.Hash.Eq(hash)).First()
if err != nil {
return nil, nil // Not found, proceed to upload
}
// Found existing file (Global deduplication hit)
// Check if user already has it (Logic deduplication hit)
myExisting, err := models.MediaAssetQuery.WithContext(ctx).
Where(models.MediaAssetQuery.Hash.Eq(hash), models.MediaAssetQuery.UserID.Eq(userID)).
First()
if err == nil {
return s.composeUploadResult(myExisting), nil
}
// Create new record for this user reusing existing ObjectKey
t, err := models.TenantQuery.WithContext(ctx).Where(models.TenantQuery.UserID.Eq(userID)).First()
var tid int64 = 0
if err == nil {
tid = t.ID
}
asset := &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: existing.Type,
Status: consts.MediaAssetStatusUploaded,
Provider: existing.Provider,
Bucket: existing.Bucket,
ObjectKey: existing.ObjectKey,
Hash: hash,
Meta: existing.Meta,
}
if err := models.MediaAssetQuery.WithContext(ctx).Create(asset); err != nil {
return nil, errorx.ErrDatabaseError.WithCause(err)
}
return s.composeUploadResult(asset), nil
}
func (s *common) Upload(
ctx context.Context,
userID int64,
file *multipart.FileHeader,
typeArg string,
) (*common_dto.UploadResult, error) { // Mock Upload to S3/MinIO (Here we just generate key, actual upload handling via direct upload or stream is better)
// But this Upload endpoint accepts file. So we save it.
objectKey := uuid.NewString() + "_" + file.Filename
@@ -60,7 +108,7 @@ func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileH
}
dstPath := filepath.Join(localPath, objectKey)
if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil {
if err := os.MkdirAll(filepath.Dir(dstPath), 0o755); err != nil {
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to create storage directory")
}
@@ -68,45 +116,90 @@ func (s *common) Upload(ctx context.Context, userID int64, file *multipart.FileH
if err != nil {
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to create destination file")
}
defer dst.Close()
if _, err = io.Copy(dst, src); err != nil {
// Hash calculation while copying
hasher := sha256.New()
size, err := io.Copy(io.MultiWriter(dst, hasher), src)
dst.Close() // Close immediately to allow removal if needed
if err != nil {
return nil, errorx.ErrInternalError.WithCause(err).WithMsg("failed to save file content")
}
url := s.GetAssetURL(objectKey)
hash := hex.EncodeToString(hasher.Sum(nil))
// ... rest ...
t, err := models.TenantQuery.WithContext(ctx).Where(models.TenantQuery.UserID.Eq(userID)).First()
var tid int64 = 0
if err == nil {
tid = t.ID
}
asset := &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: consts.MediaAssetType(typeArg),
Status: consts.MediaAssetStatusUploaded,
Provider: "local",
Bucket: "default",
ObjectKey: objectKey,
Meta: types.NewJSONType(fields.MediaAssetMeta{
Size: file.Size,
}),
var asset *models.MediaAsset
// Deduplication Check
existing, err := models.MediaAssetQuery.WithContext(ctx).Where(models.MediaAssetQuery.Hash.Eq(hash)).First()
if err == nil {
// Found existing file (Storage Deduplication)
os.Remove(dstPath) // Delete the duplicate we just wrote
// Check if user already has it (Logic Deduplication)
myExisting, err := models.MediaAssetQuery.WithContext(ctx).
Where(models.MediaAssetQuery.Hash.Eq(hash), models.MediaAssetQuery.UserID.Eq(userID)).
First()
if err == nil {
return s.composeUploadResult(myExisting), nil
}
// Create new link for user reusing existing ObjectKey
asset = &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: consts.MediaAssetType(typeArg),
Status: consts.MediaAssetStatusUploaded,
Provider: existing.Provider,
Bucket: existing.Bucket,
ObjectKey: existing.ObjectKey, // Reuse key
Hash: hash,
Meta: existing.Meta,
}
} else {
// New unique file
asset = &models.MediaAsset{
TenantID: tid,
UserID: userID,
Type: consts.MediaAssetType(typeArg),
Status: consts.MediaAssetStatusUploaded,
Provider: "local",
Bucket: "default",
ObjectKey: objectKey,
Hash: hash,
Meta: types.NewJSONType(fields.MediaAssetMeta{
Size: size,
}),
}
}
if err := models.MediaAssetQuery.WithContext(ctx).Create(asset); err != nil {
return nil, errorx.ErrDatabaseError.WithCause(err)
}
return s.composeUploadResult(asset), nil
}
func (s *common) composeUploadResult(asset *models.MediaAsset) *common_dto.UploadResult {
url := s.GetAssetURL(asset.ObjectKey)
filename := filepath.Base(asset.ObjectKey)
// Try to get original filename if stored? Currently objectKey has UUID prefix.
// We can store original filename in Meta if needed. For now, just return valid result.
// Meta is JSONType wrapper.
size := asset.Meta.Data().Size
return &common_dto.UploadResult{
ID: cast.ToString(asset.ID),
URL: url,
Filename: file.Filename,
Size: file.Size,
MimeType: file.Header.Get("Content-Type"),
}, nil
Filename: filename,
Size: size,
MimeType: "application/octet-stream", // TODO: Store mime type in DB
}
}
func (s *common) GetAssetURL(objectKey string) string {

View File

@@ -0,0 +1,8 @@
-- +goose Up
ALTER TABLE media_assets ADD COLUMN hash VARCHAR(64) DEFAULT '';
CREATE INDEX idx_media_assets_hash ON media_assets (hash);
COMMENT ON COLUMN media_assets.hash IS 'File SHA-256 hash';
-- +goose Down
DROP INDEX idx_media_assets_hash;
ALTER TABLE media_assets DROP COLUMN hash;

View File

@@ -39,9 +39,9 @@ type Content struct {
UpdatedAt time.Time `gorm:"column:updated_at;type:timestamp with time zone;default:now()" json:"updated_at"`
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;type:timestamp with time zone" json:"deleted_at"`
Key string `gorm:"column:key;type:character varying(32);comment:Musical key/tone" json:"key"` // Musical key/tone
Author *User `gorm:"foreignKey:UserID;references:ID" json:"author,omitempty"`
ContentAssets []*ContentAsset `gorm:"foreignKey:ContentID;references:ID" json:"content_assets,omitempty"`
Comments []*Comment `gorm:"foreignKey:ContentID;references:ID" json:"comments,omitempty"`
Author *User `gorm:"foreignKey:UserID;references:ID" json:"author,omitempty"`
}
// Quick operations without importing query package

View File

@@ -45,12 +45,6 @@ func newContent(db *gorm.DB, opts ...gen.DOOption) contentQuery {
_contentQuery.UpdatedAt = field.NewTime(tableName, "updated_at")
_contentQuery.DeletedAt = field.NewField(tableName, "deleted_at")
_contentQuery.Key = field.NewString(tableName, "key")
_contentQuery.Author = contentQueryBelongsToAuthor{
db: db.Session(&gorm.Session{}),
RelationField: field.NewRelation("Author", "User"),
}
_contentQuery.ContentAssets = contentQueryHasManyContentAssets{
db: db.Session(&gorm.Session{}),
@@ -63,6 +57,12 @@ func newContent(db *gorm.DB, opts ...gen.DOOption) contentQuery {
RelationField: field.NewRelation("Comments", "Comment"),
}
_contentQuery.Author = contentQueryBelongsToAuthor{
db: db.Session(&gorm.Session{}),
RelationField: field.NewRelation("Author", "User"),
}
_contentQuery.fillFieldMap()
return _contentQuery
@@ -92,12 +92,12 @@ type contentQuery struct {
UpdatedAt field.Time
DeletedAt field.Field
Key field.String // Musical key/tone
Author contentQueryBelongsToAuthor
ContentAssets contentQueryHasManyContentAssets
ContentAssets contentQueryHasManyContentAssets
Comments contentQueryHasManyComments
Author contentQueryBelongsToAuthor
fieldMap map[string]field.Expr
}
@@ -191,104 +191,23 @@ func (c *contentQuery) fillFieldMap() {
func (c contentQuery) clone(db *gorm.DB) contentQuery {
c.contentQueryDo.ReplaceConnPool(db.Statement.ConnPool)
c.Author.db = db.Session(&gorm.Session{Initialized: true})
c.Author.db.Statement.ConnPool = db.Statement.ConnPool
c.ContentAssets.db = db.Session(&gorm.Session{Initialized: true})
c.ContentAssets.db.Statement.ConnPool = db.Statement.ConnPool
c.Comments.db = db.Session(&gorm.Session{Initialized: true})
c.Comments.db.Statement.ConnPool = db.Statement.ConnPool
c.Author.db = db.Session(&gorm.Session{Initialized: true})
c.Author.db.Statement.ConnPool = db.Statement.ConnPool
return c
}
func (c contentQuery) replaceDB(db *gorm.DB) contentQuery {
c.contentQueryDo.ReplaceDB(db)
c.Author.db = db.Session(&gorm.Session{})
c.ContentAssets.db = db.Session(&gorm.Session{})
c.Comments.db = db.Session(&gorm.Session{})
c.Author.db = db.Session(&gorm.Session{})
return c
}
type contentQueryBelongsToAuthor struct {
db *gorm.DB
field.RelationField
}
func (a contentQueryBelongsToAuthor) Where(conds ...field.Expr) *contentQueryBelongsToAuthor {
if len(conds) == 0 {
return &a
}
exprs := make([]clause.Expression, 0, len(conds))
for _, cond := range conds {
exprs = append(exprs, cond.BeCond().(clause.Expression))
}
a.db = a.db.Clauses(clause.Where{Exprs: exprs})
return &a
}
func (a contentQueryBelongsToAuthor) WithContext(ctx context.Context) *contentQueryBelongsToAuthor {
a.db = a.db.WithContext(ctx)
return &a
}
func (a contentQueryBelongsToAuthor) Session(session *gorm.Session) *contentQueryBelongsToAuthor {
a.db = a.db.Session(session)
return &a
}
func (a contentQueryBelongsToAuthor) Model(m *Content) *contentQueryBelongsToAuthorTx {
return &contentQueryBelongsToAuthorTx{a.db.Model(m).Association(a.Name())}
}
func (a contentQueryBelongsToAuthor) Unscoped() *contentQueryBelongsToAuthor {
a.db = a.db.Unscoped()
return &a
}
type contentQueryBelongsToAuthorTx struct{ tx *gorm.Association }
func (a contentQueryBelongsToAuthorTx) Find() (result *User, err error) {
return result, a.tx.Find(&result)
}
func (a contentQueryBelongsToAuthorTx) Append(values ...*User) (err error) {
targetValues := make([]interface{}, len(values))
for i, v := range values {
targetValues[i] = v
}
return a.tx.Append(targetValues...)
}
func (a contentQueryBelongsToAuthorTx) Replace(values ...*User) (err error) {
targetValues := make([]interface{}, len(values))
for i, v := range values {
targetValues[i] = v
}
return a.tx.Replace(targetValues...)
}
func (a contentQueryBelongsToAuthorTx) Delete(values ...*User) (err error) {
targetValues := make([]interface{}, len(values))
for i, v := range values {
targetValues[i] = v
}
return a.tx.Delete(targetValues...)
}
func (a contentQueryBelongsToAuthorTx) Clear() error {
return a.tx.Clear()
}
func (a contentQueryBelongsToAuthorTx) Count() int64 {
return a.tx.Count()
}
func (a contentQueryBelongsToAuthorTx) Unscoped() *contentQueryBelongsToAuthorTx {
a.tx = a.tx.Unscoped()
return &a
}
type contentQueryHasManyContentAssets struct {
db *gorm.DB
@@ -451,6 +370,87 @@ func (a contentQueryHasManyCommentsTx) Unscoped() *contentQueryHasManyCommentsTx
return &a
}
type contentQueryBelongsToAuthor struct {
db *gorm.DB
field.RelationField
}
func (a contentQueryBelongsToAuthor) Where(conds ...field.Expr) *contentQueryBelongsToAuthor {
if len(conds) == 0 {
return &a
}
exprs := make([]clause.Expression, 0, len(conds))
for _, cond := range conds {
exprs = append(exprs, cond.BeCond().(clause.Expression))
}
a.db = a.db.Clauses(clause.Where{Exprs: exprs})
return &a
}
func (a contentQueryBelongsToAuthor) WithContext(ctx context.Context) *contentQueryBelongsToAuthor {
a.db = a.db.WithContext(ctx)
return &a
}
func (a contentQueryBelongsToAuthor) Session(session *gorm.Session) *contentQueryBelongsToAuthor {
a.db = a.db.Session(session)
return &a
}
func (a contentQueryBelongsToAuthor) Model(m *Content) *contentQueryBelongsToAuthorTx {
return &contentQueryBelongsToAuthorTx{a.db.Model(m).Association(a.Name())}
}
func (a contentQueryBelongsToAuthor) Unscoped() *contentQueryBelongsToAuthor {
a.db = a.db.Unscoped()
return &a
}
type contentQueryBelongsToAuthorTx struct{ tx *gorm.Association }
func (a contentQueryBelongsToAuthorTx) Find() (result *User, err error) {
return result, a.tx.Find(&result)
}
func (a contentQueryBelongsToAuthorTx) Append(values ...*User) (err error) {
targetValues := make([]interface{}, len(values))
for i, v := range values {
targetValues[i] = v
}
return a.tx.Append(targetValues...)
}
func (a contentQueryBelongsToAuthorTx) Replace(values ...*User) (err error) {
targetValues := make([]interface{}, len(values))
for i, v := range values {
targetValues[i] = v
}
return a.tx.Replace(targetValues...)
}
func (a contentQueryBelongsToAuthorTx) Delete(values ...*User) (err error) {
targetValues := make([]interface{}, len(values))
for i, v := range values {
targetValues[i] = v
}
return a.tx.Delete(targetValues...)
}
func (a contentQueryBelongsToAuthorTx) Clear() error {
return a.tx.Clear()
}
func (a contentQueryBelongsToAuthorTx) Count() int64 {
return a.tx.Count()
}
func (a contentQueryBelongsToAuthorTx) Unscoped() *contentQueryBelongsToAuthorTx {
a.tx = a.tx.Unscoped()
return &a
}
type contentQueryDo struct{ gen.DO }
func (c contentQueryDo) Debug() *contentQueryDo {

View File

@@ -34,6 +34,7 @@ type MediaAsset struct {
CreatedAt time.Time `gorm:"column:created_at;type:timestamp with time zone;default:now()" json:"created_at"`
UpdatedAt time.Time `gorm:"column:updated_at;type:timestamp with time zone;default:now()" json:"updated_at"`
DeletedAt gorm.DeletedAt `gorm:"column:deleted_at;type:timestamp with time zone" json:"deleted_at"`
Hash string `gorm:"column:hash;type:character varying(64);comment:File SHA-256 hash" json:"hash"` // File SHA-256 hash
}
// Quick operations without importing query package

View File

@@ -39,6 +39,7 @@ func newMediaAsset(db *gorm.DB, opts ...gen.DOOption) mediaAssetQuery {
_mediaAssetQuery.CreatedAt = field.NewTime(tableName, "created_at")
_mediaAssetQuery.UpdatedAt = field.NewTime(tableName, "updated_at")
_mediaAssetQuery.DeletedAt = field.NewField(tableName, "deleted_at")
_mediaAssetQuery.Hash = field.NewString(tableName, "hash")
_mediaAssetQuery.fillFieldMap()
@@ -63,6 +64,7 @@ type mediaAssetQuery struct {
CreatedAt field.Time
UpdatedAt field.Time
DeletedAt field.Field
Hash field.String // File SHA-256 hash
fieldMap map[string]field.Expr
}
@@ -93,6 +95,7 @@ func (m *mediaAssetQuery) updateTableName(table string) *mediaAssetQuery {
m.CreatedAt = field.NewTime(table, "created_at")
m.UpdatedAt = field.NewTime(table, "updated_at")
m.DeletedAt = field.NewField(table, "deleted_at")
m.Hash = field.NewString(table, "hash")
m.fillFieldMap()
@@ -125,7 +128,7 @@ func (m *mediaAssetQuery) GetFieldByName(fieldName string) (field.OrderExpr, boo
}
func (m *mediaAssetQuery) fillFieldMap() {
m.fieldMap = make(map[string]field.Expr, 14)
m.fieldMap = make(map[string]field.Expr, 15)
m.fieldMap["id"] = m.ID
m.fieldMap["tenant_id"] = m.TenantID
m.fieldMap["user_id"] = m.UserID
@@ -140,6 +143,7 @@ func (m *mediaAssetQuery) fillFieldMap() {
m.fieldMap["created_at"] = m.CreatedAt
m.fieldMap["updated_at"] = m.UpdatedAt
m.fieldMap["deleted_at"] = m.DeletedAt
m.fieldMap["hash"] = m.Hash
}
func (m mediaAssetQuery) clone(db *gorm.DB) mediaAssetQuery {

View File

@@ -14,27 +14,27 @@ func TestStorageProvider(t *testing.T) {
// Mock Config to match what we expect in config.toml
// We use a map to simulate how unmarshal might see it, or just use the Config struct directly if we can manual init.
// But provider uses UnmarshalConfig.
// To test properly, we should try to boot the provider or check the logic.
// Let's manually init the Storage struct with the config we expect to be loaded.
cfg := &storage.Config{
Type: "local",
LocalPath: "./storage",
Secret: "your-storage-secret",
BaseURL: "http://localhost:8080/v1/storage",
}
s := &storage.Storage{Config: cfg}
Convey("SignURL should return absolute URL with BaseURL", func() {
key := "test.png"
url, err := s.SignURL("GET", key, 1*time.Hour)
So(err, ShouldBeNil)
// Log for debugging
t.Logf("Generated URL: %s", url)
So(url, ShouldStartWith, "http://localhost:8080/v1/storage/test.png")
So(url, ShouldContainSubstring, "sign=")
})