chore: harden production readiness gates and runbooks
This commit is contained in:
@@ -3,7 +3,7 @@ run-name: ${{ gitea.actor }} Build Application
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
Build:
|
||||
FrontendChecks:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@@ -14,28 +14,73 @@ jobs:
|
||||
with:
|
||||
node-version: "20"
|
||||
|
||||
- name: Install dependencies and build frontend
|
||||
- name: Install portal dependencies
|
||||
run: |
|
||||
cd frontend
|
||||
npm config set registry https://npm.hub.ipao.vip
|
||||
npm install
|
||||
npm run build
|
||||
cd frontend/portal
|
||||
npm ci
|
||||
|
||||
- name: Portal lint (check only)
|
||||
run: npm -C frontend/portal run lint
|
||||
|
||||
- name: Portal build
|
||||
run: npm -C frontend/portal run build
|
||||
|
||||
- name: Install superadmin dependencies
|
||||
run: |
|
||||
cd frontend/superadmin
|
||||
npm ci
|
||||
|
||||
- name: Superadmin lint (check only)
|
||||
run: npm -C frontend/superadmin run lint
|
||||
|
||||
- name: Superadmin build
|
||||
run: npm -C frontend/superadmin run build
|
||||
|
||||
BackendChecks:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "1.22"
|
||||
|
||||
- name: Configure Go proxy
|
||||
run: |
|
||||
go env -w GOPROXY=https://go.hub.ipao.vip,direct
|
||||
go env -w GONOPROXY='git.ipao.vip'
|
||||
go env -w GONOSUMDB='git.ipao.vip'
|
||||
|
||||
- name: Run backend tests
|
||||
run: |
|
||||
cd backend
|
||||
go test ./...
|
||||
|
||||
- name: Build Go application
|
||||
run: |
|
||||
cd backend
|
||||
mkdir -p build
|
||||
go env -w GOPROXY=https://go.hub.ipao.vip,direct
|
||||
go env -w GONOPROXY='git.ipao.vip'
|
||||
go env -w GONOSUMDB='git.ipao.vip'
|
||||
go mod tidy
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o build/app .
|
||||
|
||||
- name: API smoke (health/readiness)
|
||||
run: |
|
||||
cd backend
|
||||
timeout 45s go run . serve > /tmp/quyun_backend.log 2>&1 &
|
||||
APP_PID=$!
|
||||
sleep 15
|
||||
curl -f -sS http://127.0.0.1:18080/healthz > /tmp/healthz.out
|
||||
curl -f -sS http://127.0.0.1:18080/readyz > /tmp/readyz.out
|
||||
kill ${APP_PID}
|
||||
|
||||
DockerImage:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [FrontendChecks, BackendChecks]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build final Docker image
|
||||
run: |
|
||||
docker login -u ${{ secrets.DOCKER_AF_USERNAME }} -p ${{ secrets.DOCKER_AF_PASSWORD }} docker-af.hub.ipao.vip
|
||||
|
||||
@@ -2,6 +2,7 @@ package http
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const DefaultPrefix = "Http"
|
||||
@@ -60,3 +61,11 @@ func (h *Config) Address() string {
|
||||
|
||||
return fmt.Sprintf("%s:%d", h.Host, h.Port)
|
||||
}
|
||||
|
||||
func (h *Config) HasTLS() bool {
|
||||
if h == nil || h.TLS == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return strings.TrimSpace(h.TLS.Cert) != "" && strings.TrimSpace(h.TLS.Key) != ""
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
@@ -9,9 +10,13 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"quyun/v2/app/errorx"
|
||||
"quyun/v2/providers/storage"
|
||||
|
||||
logrus "github.com/sirupsen/logrus"
|
||||
"go.ipao.vip/atom/container"
|
||||
"go.ipao.vip/atom/opt"
|
||||
"go.uber.org/dig"
|
||||
|
||||
"github.com/gofiber/fiber/v3"
|
||||
"github.com/gofiber/fiber/v3/middleware/compress"
|
||||
@@ -22,8 +27,6 @@ import (
|
||||
"github.com/gofiber/fiber/v3/middleware/recover"
|
||||
"github.com/gofiber/fiber/v3/middleware/requestid"
|
||||
"github.com/samber/lo"
|
||||
|
||||
"quyun/v2/app/errorx"
|
||||
)
|
||||
|
||||
func DefaultProvider() container.ProviderContainer {
|
||||
@@ -38,6 +41,8 @@ func DefaultProvider() container.ProviderContainer {
|
||||
type Service struct {
|
||||
conf *Config
|
||||
Engine *fiber.App
|
||||
healthCheck func(context.Context) error
|
||||
readyCheck func(context.Context) error
|
||||
}
|
||||
|
||||
var errTLSCertKeyRequired = errors.New("tls cert and key must be set")
|
||||
@@ -98,7 +103,11 @@ func Provide(opts ...opt.Option) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return container.Container.Provide(func() (*Service, error) {
|
||||
return container.Container.Provide(func(params struct {
|
||||
dig.In
|
||||
DB *sql.DB `optional:"true"`
|
||||
Storage *storage.Storage `optional:"true"`
|
||||
}) (*Service, error) {
|
||||
engine := fiber.New(fiber.Config{
|
||||
StrictRouting: true,
|
||||
CaseSensitive: true,
|
||||
@@ -198,8 +207,14 @@ func Provide(opts ...opt.Option) error {
|
||||
}))
|
||||
}
|
||||
|
||||
engine.Get("/healthz", func(c fiber.Ctx) error { return c.SendStatus(fiber.StatusNoContent) })
|
||||
engine.Get("/readyz", func(c fiber.Ctx) error { return c.SendStatus(fiber.StatusNoContent) })
|
||||
service := &Service{
|
||||
Engine: engine,
|
||||
conf: &config,
|
||||
}
|
||||
service.healthCheck = service.buildHealthCheck()
|
||||
service.readyCheck = service.buildReadyCheck(params.DB, params.Storage)
|
||||
engine.Get("/healthz", service.handleHealthz)
|
||||
engine.Get("/readyz", service.handleReadyz)
|
||||
|
||||
engine.Hooks().OnPostShutdown(func(err error) error {
|
||||
if err != nil {
|
||||
@@ -210,14 +225,72 @@ func Provide(opts ...opt.Option) error {
|
||||
return nil
|
||||
})
|
||||
|
||||
return &Service{
|
||||
Engine: engine,
|
||||
conf: &config,
|
||||
}, nil
|
||||
return service, nil
|
||||
}, o.DiOptions()...)
|
||||
}
|
||||
|
||||
// buildCORSConfig converts provider Cors config into fiber cors.Config
|
||||
func (svc *Service) buildHealthCheck() func(context.Context) error {
|
||||
return func(_ context.Context) error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (svc *Service) buildReadyCheck(db *sql.DB, store *storage.Storage) func(context.Context) error {
|
||||
var dbPing func(context.Context) error
|
||||
if db != nil {
|
||||
dbPing = func(ctx context.Context) error {
|
||||
pingCtx, cancel := context.WithTimeout(ctx, 1500*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
return db.PingContext(pingCtx)
|
||||
}
|
||||
}
|
||||
|
||||
return newReadyCheck(dbPing, store)
|
||||
}
|
||||
|
||||
func newReadyCheck(dbPing func(context.Context) error, store *storage.Storage) func(context.Context) error {
|
||||
return func(ctx context.Context) error {
|
||||
if dbPing != nil {
|
||||
if err := dbPing(ctx); err != nil {
|
||||
return errorx.ErrServiceUnavailable.WithCause(err).WithMsg("database not ready")
|
||||
}
|
||||
}
|
||||
if store != nil && store.Config != nil && strings.EqualFold(strings.TrimSpace(store.Config.Type), "s3") && store.Config.CheckOnBoot {
|
||||
if strings.TrimSpace(store.Config.Endpoint) == "" || strings.TrimSpace(store.Config.Bucket) == "" {
|
||||
return errorx.ErrServiceUnavailable.WithMsg("storage not ready")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (svc *Service) handleHealthz(c fiber.Ctx) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
if svc.healthCheck != nil {
|
||||
if err := svc.healthCheck(ctx); err != nil {
|
||||
return errorx.SendError(c, err)
|
||||
}
|
||||
}
|
||||
|
||||
return c.SendStatus(fiber.StatusNoContent)
|
||||
}
|
||||
|
||||
func (svc *Service) handleReadyz(c fiber.Ctx) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
if svc.readyCheck != nil {
|
||||
if err := svc.readyCheck(ctx); err != nil {
|
||||
return errorx.SendError(c, err)
|
||||
}
|
||||
}
|
||||
|
||||
return c.SendStatus(fiber.StatusNoContent)
|
||||
}
|
||||
|
||||
func buildCORSConfig(c *Cors) *cors.Config {
|
||||
if c == nil {
|
||||
return nil
|
||||
|
||||
52
backend/providers/http/engine_test.go
Normal file
52
backend/providers/http/engine_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"quyun/v2/providers/storage"
|
||||
)
|
||||
|
||||
func TestNewReadyCheck(t *testing.T) {
|
||||
t.Run("returns error when database ping fails", func(t *testing.T) {
|
||||
checker := newReadyCheck(func(context.Context) error {
|
||||
return errors.New("db down")
|
||||
}, nil)
|
||||
|
||||
err := checker(context.Background())
|
||||
if err == nil {
|
||||
t.Fatalf("expected readiness error when db ping fails")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("returns error when s3 storage config is incomplete and check on boot enabled", func(t *testing.T) {
|
||||
checker := newReadyCheck(nil, &storage.Storage{Config: &storage.Config{
|
||||
Type: "s3",
|
||||
CheckOnBoot: true,
|
||||
Endpoint: "",
|
||||
Bucket: "",
|
||||
}})
|
||||
|
||||
err := checker(context.Background())
|
||||
if err == nil {
|
||||
t.Fatalf("expected readiness error when storage config is incomplete")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("returns nil when dependencies are ready", func(t *testing.T) {
|
||||
checker := newReadyCheck(func(context.Context) error {
|
||||
return nil
|
||||
}, &storage.Storage{Config: &storage.Config{
|
||||
Type: "s3",
|
||||
CheckOnBoot: true,
|
||||
Endpoint: "http://127.0.0.1:9000",
|
||||
Bucket: "bucket",
|
||||
}})
|
||||
|
||||
err := checker(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error, got %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.ipao.vip/atom/container"
|
||||
@@ -92,6 +93,8 @@ func (config *Config) checkDefault() {
|
||||
|
||||
if config.SslMode == "" {
|
||||
config.SslMode = "disable"
|
||||
} else {
|
||||
config.SslMode = strings.ToLower(strings.TrimSpace(config.SslMode))
|
||||
}
|
||||
|
||||
if config.TimeZone == "" {
|
||||
@@ -141,3 +144,9 @@ func (config *Config) DSN() string {
|
||||
|
||||
return base + extras
|
||||
}
|
||||
|
||||
func (config *Config) IsTLSEnabled() bool {
|
||||
mode := strings.ToLower(strings.TrimSpace(config.SslMode))
|
||||
|
||||
return mode != "" && mode != "disable"
|
||||
}
|
||||
|
||||
@@ -3,12 +3,16 @@ package postgres
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"quyun/v2/providers/app"
|
||||
|
||||
logrus "github.com/sirupsen/logrus"
|
||||
"go.ipao.vip/atom/container"
|
||||
"go.ipao.vip/atom/opt"
|
||||
"go.uber.org/dig"
|
||||
"gorm.io/driver/postgres"
|
||||
"gorm.io/gorm"
|
||||
"gorm.io/gorm/logger"
|
||||
@@ -22,7 +26,13 @@ func Provide(opts ...opt.Option) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return container.Container.Provide(func() (*gorm.DB, *sql.DB, *Config, error) {
|
||||
return container.Container.Provide(func(params struct {
|
||||
dig.In
|
||||
App *app.Config `optional:"true"`
|
||||
}) (*gorm.DB, *sql.DB, *Config, error) {
|
||||
if params.App != nil && params.App.IsReleaseMode() && !conf.IsTLSEnabled() {
|
||||
return nil, nil, nil, fmt.Errorf("release mode requires Database.SslMode to enable TLS")
|
||||
}
|
||||
dbConfig := postgres.Config{DSN: conf.DSN()}
|
||||
|
||||
// 安全日志:不打印密码,仅输出关键连接信息
|
||||
|
||||
168
docs/backup_restore_runbook.md
Normal file
168
docs/backup_restore_runbook.md
Normal file
@@ -0,0 +1,168 @@
|
||||
# Backup / Restore Runbook (Pre-Prod & Prod)
|
||||
|
||||
## 1. Scope
|
||||
|
||||
适用于 `quyun_v2` 的以下状态数据:
|
||||
- PostgreSQL(业务主数据)
|
||||
- 对象存储目录(本地存储或 S3 兼容对象)
|
||||
- 关键运行配置快照(不含明文 secret)
|
||||
|
||||
本 Runbook 目标:
|
||||
1. 能稳定执行备份
|
||||
2. 能在预发环境完成恢复
|
||||
3. 有明确 RTO / RPO 验证步骤
|
||||
|
||||
---
|
||||
|
||||
## 2. Preconditions
|
||||
|
||||
- 拥有数据库备份权限(`pg_dump` / `psql`)
|
||||
- 拥有对象存储读写权限(本地目录或 S3 API)
|
||||
- 预发环境可用并与生产版本兼容
|
||||
- 已确认以下变量(示例):
|
||||
|
||||
```bash
|
||||
export QY_DB_HOST=127.0.0.1
|
||||
export QY_DB_PORT=5432
|
||||
export QY_DB_NAME=quyun_v2
|
||||
export QY_DB_USER=postgres
|
||||
export QY_DB_PASSWORD='***'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. PostgreSQL Backup
|
||||
|
||||
### 3.1 创建备份目录
|
||||
|
||||
```bash
|
||||
mkdir -p /tmp/quyun-backup
|
||||
```
|
||||
|
||||
### 3.2 导出数据库(自定义格式)
|
||||
|
||||
```bash
|
||||
PGPASSWORD="$QY_DB_PASSWORD" \
|
||||
pg_dump -h "$QY_DB_HOST" -p "$QY_DB_PORT" -U "$QY_DB_USER" \
|
||||
-F c -d "$QY_DB_NAME" \
|
||||
-f "/tmp/quyun-backup/${QY_DB_NAME}_$(date +%Y%m%d_%H%M%S).dump"
|
||||
```
|
||||
|
||||
### 3.3 备份完整性校验
|
||||
|
||||
```bash
|
||||
pg_restore -l /tmp/quyun-backup/<backup-file>.dump >/tmp/quyun-backup/restore.list
|
||||
```
|
||||
|
||||
验收标准:命令退出码为 0,且 `restore.list` 非空。
|
||||
|
||||
---
|
||||
|
||||
## 4. Object Storage Backup
|
||||
|
||||
## 4.1 本地存储(`Storage.Type=local`)
|
||||
|
||||
```bash
|
||||
tar -czf "/tmp/quyun-backup/storage_$(date +%Y%m%d_%H%M%S).tar.gz" ./backend/storage
|
||||
```
|
||||
|
||||
### 4.2 S3/MinIO(`Storage.Type=s3`)
|
||||
|
||||
使用 `mc`(MinIO Client)示例:
|
||||
|
||||
```bash
|
||||
mc alias set quyun-s3 http://127.0.0.1:9000 "$STORAGE_ACCESS_KEY" "$STORAGE_SECRET_KEY"
|
||||
mc mirror quyun-s3/quyun-01 "/tmp/quyun-backup/s3_quyun-01_$(date +%Y%m%d_%H%M%S)"
|
||||
```
|
||||
|
||||
验收标准:目标目录文件数量 > 0,且抽样对象可读取。
|
||||
|
||||
---
|
||||
|
||||
## 5. Restore Procedure (Pre-Prod Drill)
|
||||
|
||||
### 5.1 预发库准备
|
||||
|
||||
```bash
|
||||
PGPASSWORD="$QY_DB_PASSWORD" \
|
||||
psql -h "$QY_DB_HOST" -p "$QY_DB_PORT" -U "$QY_DB_USER" -d postgres \
|
||||
-c "DROP DATABASE IF EXISTS ${QY_DB_NAME}_restore;"
|
||||
|
||||
PGPASSWORD="$QY_DB_PASSWORD" \
|
||||
psql -h "$QY_DB_HOST" -p "$QY_DB_PORT" -U "$QY_DB_USER" -d postgres \
|
||||
-c "CREATE DATABASE ${QY_DB_NAME}_restore;"
|
||||
```
|
||||
|
||||
### 5.2 恢复数据库
|
||||
|
||||
```bash
|
||||
PGPASSWORD="$QY_DB_PASSWORD" \
|
||||
pg_restore -h "$QY_DB_HOST" -p "$QY_DB_PORT" -U "$QY_DB_USER" \
|
||||
-d "${QY_DB_NAME}_restore" --clean --if-exists \
|
||||
"/tmp/quyun-backup/<backup-file>.dump"
|
||||
```
|
||||
|
||||
### 5.3 恢复后校验
|
||||
|
||||
```bash
|
||||
PGPASSWORD="$QY_DB_PASSWORD" \
|
||||
psql -h "$QY_DB_HOST" -p "$QY_DB_PORT" -U "$QY_DB_USER" -d "${QY_DB_NAME}_restore" \
|
||||
-c "SELECT COUNT(*) FROM users;"
|
||||
|
||||
PGPASSWORD="$QY_DB_PASSWORD" \
|
||||
psql -h "$QY_DB_HOST" -p "$QY_DB_PORT" -U "$QY_DB_USER" -d "${QY_DB_NAME}_restore" \
|
||||
-c "SELECT COUNT(*) FROM audit_logs;"
|
||||
```
|
||||
|
||||
验收标准:
|
||||
- 核心表(`users`, `orders`, `audit_logs`, `contents`)有合理数据量
|
||||
- 抽样业务查询无语法或权限错误
|
||||
|
||||
---
|
||||
|
||||
## 6. Service Verification After Restore
|
||||
|
||||
启动服务后执行:
|
||||
|
||||
```bash
|
||||
curl -f -sS http://127.0.0.1:18080/healthz
|
||||
curl -f -sS http://127.0.0.1:18080/readyz
|
||||
```
|
||||
|
||||
验收标准:两个端点均返回 2xx。
|
||||
|
||||
---
|
||||
|
||||
## 7. RTO / RPO Recording
|
||||
|
||||
每次演练记录:
|
||||
- Backup start/end time
|
||||
- Restore start/end time
|
||||
- Data validation result
|
||||
- Incident / blockers
|
||||
|
||||
建议目标:
|
||||
- RTO <= 30 分钟
|
||||
- RPO <= 24 小时(按日备份基线)
|
||||
|
||||
---
|
||||
|
||||
## 8. Failure Handling
|
||||
|
||||
- `pg_dump` 失败:检查网络/权限/磁盘空间,重试一次
|
||||
- `pg_restore` 失败:保留日志,回退至原预发库,不进行覆盖发布
|
||||
- 对象恢复失败:仅允许在“非阻断业务路径”条件下继续演练,否则中止
|
||||
|
||||
---
|
||||
|
||||
## 9. Evidence Requirement
|
||||
|
||||
每次演练需归档到:
|
||||
- `docs/release-evidence/<date>.md`
|
||||
|
||||
最少包含:
|
||||
1. 执行人、时间窗
|
||||
2. 命令与退出码
|
||||
3. 核心校验 SQL 输出
|
||||
4. healthz/readyz 结果
|
||||
5. 结论(PASS/FAIL)
|
||||
297
docs/plan.md
297
docs/plan.md
@@ -1,297 +0,0 @@
|
||||
# Implementation Plan: 生产部署能力补齐(数据源统一、隔离强化、双侧审计)
|
||||
|
||||
**Branch**: `[prod-readiness-hardening]` | **Date**: 2026-02-08 | **Spec**: 会话需求(评估并补齐生产部署能力)
|
||||
**Input**: 用户要求按 4 项标准补齐生产部署能力:
|
||||
1) 前端数据来源后端接口/渲染;
|
||||
2) 用户/租户数据隔离完备;
|
||||
3) 超级管理员后台可审计;
|
||||
4) 租户管理侧可审计。
|
||||
|
||||
## Summary
|
||||
|
||||
当前评估结果为:
|
||||
- #1 前端数据来源:未完全达标(存在硬编码业务数据页面);
|
||||
- #2 用户/租户隔离:基础机制已具备,但依赖服务层手工加租户条件,仍有遗漏风险;
|
||||
- #3 超管审计:已具备(audit_logs 表 + 超管查询页面/API);
|
||||
- #4 租户侧审计:未达标(仅有 audit 类通知,不等同审计日志查询能力)。
|
||||
|
||||
本计划目标是在最小风险下将以上 4 项全部提升为可上线状态:
|
||||
- 去除生产路径硬编码业务数据;
|
||||
- 强化多租户隔离“可证明性”(代码约束 + 负向测试);
|
||||
- 保持并补强超管审计覆盖;
|
||||
- 增加租户管理侧可审计能力(租户范围审计日志查询 API + 页面);
|
||||
- 建立可审签发布证据链(route/swagger/test/UI flow/evidence/archive)。
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**:
|
||||
- Backend: Go(Fiber + GORM-Gen)
|
||||
- Frontend: Vue 3 + Vite(portal/superadmin)
|
||||
|
||||
**Primary Dependencies**:
|
||||
- Backend: `backend/app/http/v1`, `backend/app/http/super/v1`, `backend/app/services/*`, `backend/database/models/*`
|
||||
- Frontend: `frontend/portal/src/views/*`, `frontend/portal/src/api/*`, `frontend/superadmin/src/views/superadmin/*`, `frontend/superadmin/src/service/*`
|
||||
- Generators: `atomctl gen route`, `atomctl swag init`
|
||||
|
||||
**Storage**:
|
||||
- PostgreSQL(`audit_logs` 已存在,必要时补索引/查询优化)
|
||||
|
||||
**Testing**:
|
||||
- Backend: `cd backend && env GOCACHE=$PWD/.gocache GOTMPDIR=$PWD/.gotmp go test ./...`
|
||||
- Frontend Build/Lint:
|
||||
- `npm -C frontend/portal run build`
|
||||
- `npm -C frontend/portal run lint`
|
||||
- `npm -C frontend/superadmin run build`
|
||||
- `npm -C frontend/superadmin run lint`
|
||||
- Frontend功能验收(页面流):
|
||||
- 超管审计日志查询流
|
||||
- 租户管理侧审计日志查询流
|
||||
- 用户“我的点赞/收藏/订单”等后端数据回填流
|
||||
|
||||
**Target Platform**:
|
||||
- Linux 部署环境(Web + API)
|
||||
|
||||
**Project Type**:
|
||||
- Web application(frontend + backend)
|
||||
|
||||
**Performance Goals**:
|
||||
- 审计日志列表接口在常规分页(20条)下 p95 <= 300ms(预发基准)
|
||||
- 不引入明显回归(核心列表接口不劣化)
|
||||
|
||||
**Performance Measurement Protocol**:
|
||||
- 测量接口:`/super/v1/audit-logs` 与新增租户审计列表接口。
|
||||
- 测量条件:`page=1&limit=20`,默认排序(`created_at desc`),不带 keyword。
|
||||
- 样本规则:预热 10 次后连续采样 50 次,统计 p95。
|
||||
- 证据输出:写入 `docs/release-evidence/<date>.md` 的“性能基线”小节。
|
||||
|
||||
**Constraints**:
|
||||
- 禁止手改 `*.gen.go`(路由/文档生成文件仅通过工具生成)
|
||||
- 控制器保持薄层(bind -> services -> return)
|
||||
- 不使用 `as any` / `@ts-ignore` / `@ts-expect-error`
|
||||
- 缺陷修复最小化,不做无关重构
|
||||
|
||||
**Scale/Scope**:
|
||||
- 覆盖 portal + superadmin + backend v1/super v1 审计与隔离相关模块
|
||||
- 覆盖上线阻塞项(P0)与并行优化项(P1)
|
||||
|
||||
## Constitution Check
|
||||
|
||||
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
|
||||
|
||||
- ✅ 符合后端分层:Controller 不直接做 DAO CRUD,统一走 `services.*`
|
||||
- ✅ 符合生成文件约束:路由与 Swagger 仅通过 `atomctl` 生成
|
||||
- ✅ 符合多租户前缀规范:`/v1/t/:tenantCode/*`
|
||||
- ✅ 覆盖前后端联调验收要求:前端页面流 + `go test ./...`
|
||||
- ✅ 计划包含任务拆解、依赖、验收标准、风险项、责任归属与里程碑
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this phase)
|
||||
|
||||
```text
|
||||
docs/
|
||||
├── plan.md # 当前执行计划(本文件)
|
||||
├── release-evidence/<date>.md # 验收证据模板与执行结果
|
||||
└── plans/<date>.md # 阶段完成归档
|
||||
```
|
||||
|
||||
### Source Code (repository root)
|
||||
|
||||
```text
|
||||
backend/
|
||||
├── app/http/v1/
|
||||
│ ├── routes.manual.go
|
||||
│ ├── routes.gen.go # 生成文件(勿手改)
|
||||
│ ├── user.go
|
||||
│ ├── creator.go
|
||||
│ └── (new) audit_logs.go # 若新增租户侧审计接口
|
||||
├── app/http/super/v1/
|
||||
│ ├── audit_logs.go
|
||||
│ ├── routes.manual.go
|
||||
│ └── routes.gen.go # 生成文件(勿手改)
|
||||
├── app/services/
|
||||
│ ├── audit.go
|
||||
│ ├── order.go
|
||||
│ ├── content.go
|
||||
│ ├── coupon.go
|
||||
│ ├── tenant*.go
|
||||
│ └── super.go
|
||||
└── docs/
|
||||
├── docs.go # 生成文件(勿手改)
|
||||
├── swagger.yaml # 生成文件(勿手改)
|
||||
└── swagger.json # 生成文件(勿手改)
|
||||
|
||||
frontend/
|
||||
├── portal/
|
||||
│ └── src/
|
||||
│ ├── api/
|
||||
│ ├── router/index.js
|
||||
│ └── views/
|
||||
└── superadmin/
|
||||
└── src/
|
||||
├── router/index.js
|
||||
├── service/
|
||||
└── views/superadmin/
|
||||
```
|
||||
|
||||
**Structure Decision**: 在现有目录内增量修复,不新增独立子项目;优先复用现有 service 与 DTO,确保变更可控。
|
||||
|
||||
## Plan Phases
|
||||
|
||||
### Phase 1 — 前端数据源统一(P0)
|
||||
目标:满足“前端业务数据必须来自后端接口/渲染”。
|
||||
- 基于 router 生成“生产页面 -> 数据来源(API/mock/static)”清单。
|
||||
- 替换生产路径中的硬编码业务数据(示例:Portal 用户侧业务列表)。
|
||||
- 处理 Superadmin demo 数据风险:默认禁入生产菜单/路由(必要时移出生产构建入口)。
|
||||
|
||||
### Phase 2 — 用户/租户隔离强化(P0)
|
||||
目标:把“依赖人工自觉加条件”改为“可验证、可回归”。
|
||||
- 梳理关键服务(order/content/coupon/tenant/wallet)过滤点。
|
||||
- 对高风险路径补足 query-time tenant/user 约束。
|
||||
- 增加跨租户越权负向测试并形成隔离矩阵证据。
|
||||
|
||||
### Phase 3 — 审计能力补齐(超管 + 租户)(P0)
|
||||
目标:满足 #3/#4 审计要求。
|
||||
- 超管侧:盘点并补齐关键操作 `Audit.Log` 覆盖。
|
||||
- 租户侧:新增租户审计查询 API + 页面,附角色权限控制。
|
||||
- 完成 route 生成与 Swagger 生成,确保接口可见且可调试。
|
||||
|
||||
### Phase 4 — 验证、证据与发布门禁(P0)
|
||||
目标:形成可审签的上线证据。
|
||||
- 执行 backend 全量测试与双前端 build/lint。
|
||||
- 执行前端页面流验收并按模板记录证据。
|
||||
- 输出发布门禁检查单(Pass/Fail + 证据路径)。
|
||||
- 阶段通过后归档 `docs/plan.md` 到 `docs/plans/<date>.md`,并清空 `docs/plan.md`。
|
||||
|
||||
## Tasks
|
||||
|
||||
- [ ] T1 建立“生产部署能力差距台账”(含 router->页面->数据来源盘点)。
|
||||
- [ ] T2 替换 Portal 生产路径中的硬编码业务数据来源。
|
||||
- [ ] T3 下线或隔离 Superadmin demo 数据路由(防止进入生产流量路径)。
|
||||
- [ ] T4 建立后端隔离审计清单(order/content/coupon/tenant/wallet)。
|
||||
- [ ] T5 对高风险服务补充 tenant/user query-time 约束(最小改动)。
|
||||
- [ ] T6 增加跨租户越权测试用例并纳入回归。
|
||||
- [ ] T7 盘点超管关键操作审计埋点,补齐缺失 `Audit.Log` 调用。
|
||||
- [ ] T8 设计并实现租户侧审计日志查询 API(租户范围、权限受控)。
|
||||
- [ ] T9 执行 `cd backend && atomctl gen route` 并校验新增路由注册。
|
||||
- [ ] T10 实现租户管理侧审计页面(筛选、分页、时间范围、动作类型)。
|
||||
- [ ] T11 补充审计相关 API 注释与字段定义(operator/action/target/detail)。
|
||||
- [ ] T12 执行 `cd backend && atomctl swag init` 并校验 Swagger 产物更新。
|
||||
- [ ] T13 执行 backend 全量测试 `go test ./...`。
|
||||
- [ ] T14 执行 portal/superadmin build + lint。
|
||||
- [ ] T15 创建并冻结验收证据模板:`docs/release-evidence/<date>.md`。
|
||||
- [ ] T16 执行前端页面流验收并填充证据模板(含截图/日志/结论)。
|
||||
- [ ] T17 汇总发布门禁清单并形成 Go/No-Go 结论。
|
||||
- [ ] T18 阶段完成后归档 `docs/plan.md` 到 `docs/plans/<date>.md`,并清空 `docs/plan.md`。
|
||||
|
||||
## Dependencies
|
||||
|
||||
- T1 -> T2/T3/T4/T7(先冻结缺口,再实施)
|
||||
- T4 -> T5 -> T6 -> T13(隔离设计先行,测试跟进)
|
||||
- T7 -> T8 -> T9 -> T10(先有后端能力与路由,再接前端)
|
||||
- T8 -> T11 -> T12(接口定义后再生成 Swagger)
|
||||
- T2/T3/T10 -> T14/T16(前端改造完成后再构建与页面验收)
|
||||
- T15 -> T16 -> T17(证据模板先行)
|
||||
- T9/T12/T13/T14/T16 -> T17(发布结论依赖完整证据链)
|
||||
- T17(Go) -> T18(仅 Go 才归档;No-Go 不归档)
|
||||
|
||||
## Owner Roles (RACI-lite)
|
||||
|
||||
> 说明:每个任务必须有唯一 DRI(Directly Responsible Individual)与一个 Approver。
|
||||
|
||||
| Role | 主要职责 |
|
||||
|------|----------|
|
||||
| Tech Lead (Overall) | 统筹范围、顺序、风险与里程碑审签 |
|
||||
| Backend Lead | 隔离强化、审计后端能力、生成流程与后端验证 |
|
||||
| Frontend Portal Owner | Portal/租户侧页面改造与联调 |
|
||||
| Frontend Superadmin Owner | Superadmin 生产路径治理与联调 |
|
||||
| QA Owner | 回归用例、页面流验收、证据完整性审查 |
|
||||
| Release Owner | 发布门禁、Go/No-Go 会议与归档执行 |
|
||||
|
||||
## Task Accountability (DRI / Approver)
|
||||
|
||||
| Task | DRI | Approver |
|
||||
|------|-----|----------|
|
||||
| T1 | Tech Lead | QA Owner |
|
||||
| T2 | Frontend Portal Owner | Tech Lead |
|
||||
| T3 | Frontend Superadmin Owner | Tech Lead |
|
||||
| T4 | Backend Lead | Tech Lead |
|
||||
| T5 | Backend Lead | QA Owner |
|
||||
| T6 | QA Owner | Backend Lead |
|
||||
| T7 | Backend Lead | Tech Lead |
|
||||
| T8 | Backend Lead | Tech Lead |
|
||||
| T9 | Backend Lead | Tech Lead |
|
||||
| T10 | Frontend Portal Owner | QA Owner |
|
||||
| T11 | Backend Lead | Tech Lead |
|
||||
| T12 | Backend Lead | Tech Lead |
|
||||
| T13 | Backend Lead | QA Owner |
|
||||
| T14 | Frontend Superadmin Owner | QA Owner |
|
||||
| T15 | QA Owner | Release Owner |
|
||||
| T16 | QA Owner | Release Owner |
|
||||
| T17 | Release Owner | Tech Lead |
|
||||
| T18 | Release Owner | Tech Lead |
|
||||
|
||||
## Execution Schedule (10 Working Days)
|
||||
|
||||
> 计划周期:10个工作日。发生阻塞时顺延,不压缩验证与证据阶段。
|
||||
|
||||
| Day | Focus | 任务 | Primary Owner | 协作方 | 当日退出标准 |
|
||||
|-----|-------|------|---------------|--------|--------------|
|
||||
| D1 | 基线与证据机制冻结 | T1, T4(启动), T15 | Tech Lead, Backend Lead, QA Owner | Release Owner | 差距台账冻结;隔离清单初版;证据模板落盘 |
|
||||
| D2 | 前端数据源整改(Portal) | T2(Portal核心页面) | Frontend Portal Owner | Backend Lead | 生产路径硬编码业务数据改为 API 拉取 |
|
||||
| D3 | 前端生产路径治理(Superadmin) | T2(收尾), T3 | Frontend Superadmin Owner | Tech Lead | demo 路由完成隔离且不进入生产入口 |
|
||||
| D4 | 隔离约束补强(一) | T5(order/content) | Backend Lead | QA Owner | 关键查询改为 query-time 约束,完成自检 |
|
||||
| D5 | 隔离约束补强(二)+ 负向测试 | T5(coupon/tenant/wallet), T6 | Backend Lead, QA Owner | Tech Lead | 跨租户负向测试覆盖核心场景并通过 |
|
||||
| D6 | 审计覆盖补漏 + 租户审计API开发 | T7, T8(启动) | Backend Lead | Frontend Portal Owner | 审计补漏清单清零;租户审计 API 代码完成 |
|
||||
| D7 | 路由生成与后端可调用性 | T9, T8(联调收口) | Backend Lead | Tech Lead | `atomctl gen route` 完成且新增路由注册可见 |
|
||||
| D8 | 租户审计前端与接口文档定义 | T10, T11 | Frontend Portal Owner | Backend Lead, QA Owner | 租户审计页面接通后端;注释字段定义完成 |
|
||||
| D9 | 文档生成 + 构建测试 | T12, T13, T14 | Backend Lead, Frontend Superadmin Owner | QA Owner | Swagger 产物更新;go test 与双前端 build/lint 可复现 |
|
||||
| D10 | 页面流验收与发布评审 | T16, T17, T18(条件触发) | QA Owner, Release Owner | 全员 | 页面流证据齐全;Go/No-Go 结论明确;Go 时完成归档与清空 |
|
||||
|
||||
## Milestones
|
||||
|
||||
- **M1 (D3 End): 前端生产数据源统一完成**
|
||||
Exit Criteria: 生产路径无硬编码业务记录数据;demo 路由已隔离。
|
||||
|
||||
- **M2 (D5 End): 多租户隔离强化完成**
|
||||
Exit Criteria: 高风险服务完成 query-time 约束;跨租户负向测试通过。
|
||||
|
||||
- **M3 (D9 End): 双侧审计能力与生成链路完成**
|
||||
Exit Criteria: 超管审计补漏完成;租户审计 API + 页面可用;route/swagger 生成成功。
|
||||
|
||||
- **M4 (D10 End): 发布门禁与归档完成**
|
||||
Exit Criteria: 测试、构建、页面流证据与门禁结论完整;Go 时归档并清空活动 plan。
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
1. 前端生产路径不再存在硬编码业务数据列表(允许 UI 常量,不允许业务记录数据常量)。
|
||||
2. 用户/租户隔离具备可回归证据:跨租户访问负向测试通过,关键接口均有 tenant/user 约束。
|
||||
3. 超级管理员后台审计可用:可按租户、操作者、动作、时间筛查审计日志。
|
||||
4. 租户管理侧审计可用:租户管理员可查询本租户审计日志,且无法查看其他租户数据。
|
||||
5. 新增/调整审计 API 后,必须完成 `atomctl gen route` 且路由注册可验证。
|
||||
6. 新增/调整审计 API 后,必须完成 `atomctl swag init` 且 swagger 产物与接口一致。
|
||||
7. 若涉及前端改动,必须完成页面流验收;同时完成 backend `go test ./...`。
|
||||
8. 发布门禁清单全部通过后方可标记“具备生产部署能力”;且需执行归档到 `docs/plans/<date>.md` 并清空 `docs/plan.md`。
|
||||
|
||||
## Risks
|
||||
|
||||
- **风险1:前端替换数据源引发页面空态/交互回归**
|
||||
- 缓解:逐页替换 + 空态兜底 + 页面流测试。
|
||||
|
||||
- **风险2:隔离强化引发历史“宽查询”行为变化**
|
||||
- 缓解:先补负向测试,再做最小修复,避免大规模重构。
|
||||
|
||||
- **风险3:审计日志增长导致查询性能下降**
|
||||
- 缓解:按测量协议验证 p95,必要时补复合索引并复测。
|
||||
|
||||
- **风险4:租户侧审计权限定义不清**
|
||||
- 缓解:实现前冻结角色矩阵(owner/admin/member),验收时执行越权测试。
|
||||
|
||||
- **风险5:证据缺失导致 Go/No-Go 争议**
|
||||
- 缓解:统一证据模板、固定路径、固定 DRI/Approver 审核链。
|
||||
|
||||
## Complexity Tracking
|
||||
|
||||
| Violation | Why Needed | Simpler Alternative Rejected Because |
|
||||
|-----------|------------|-------------------------------------|
|
||||
| N/A | N/A | N/A |
|
||||
|
||||
188
docs/plans/2026-02-09.md
Normal file
188
docs/plans/2026-02-09.md
Normal file
@@ -0,0 +1,188 @@
|
||||
# Implementation Plan: 生产级部署能力 P0 补齐(两周)
|
||||
|
||||
**Branch**: `[prod-p0-hardening]` | **Date**: 2026-02-09 | **Spec**: 当前会话需求(生产部署能力评估后的整改计划)
|
||||
**Input**: 基于当前评估结果(12/24,50%)制定 P0 硬化计划,目标达到可审签上线门槛。
|
||||
|
||||
## Summary
|
||||
|
||||
本阶段聚焦“可发布但未完全生产就绪”的关键缺口,按 P0 优先级补齐以下 5 项:
|
||||
|
||||
1. Secrets 治理:移除/替换仓库中生产明文敏感配置,完成密钥轮换与注入规范。
|
||||
2. 生产数据库 TLS 强制:release 模式下禁止 `sslmode=disable`。
|
||||
3. CI/CD 强门禁:强制 backend `go test ./...`、frontend lint(check-only)+build、最小 smoke 验证。
|
||||
4. 备份恢复与回滚闭环:形成 runbook 并完成预发演练,沉淀可追溯证据。
|
||||
5. `/readyz` 深度就绪检查:由“存活探针”升级为“依赖感知探针”。
|
||||
|
||||
阶段产出是可审签的 Go/No-Go 结论与证据链;未通过门禁则不得标记生产就绪。
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**:
|
||||
- Backend: Go(Fiber + GORM-Gen)
|
||||
- Frontend: Vue 3 + Vite(portal/superadmin)
|
||||
|
||||
**Primary Dependencies**:
|
||||
- Backend: `backend/providers/http/*`, `backend/providers/postgres/*`, `backend/app/commands/*`
|
||||
- Frontend: `frontend/portal/package.json`, `frontend/superadmin/package.json`
|
||||
- CI: `backend/.gitea/workflows/build.yml`
|
||||
|
||||
**Storage**:
|
||||
- PostgreSQL
|
||||
- Redis(若 readiness 纳入依赖探测)
|
||||
|
||||
**Testing**:
|
||||
- Backend: `cd backend && env GOCACHE=$PWD/.gocache GOTMPDIR=$PWD/.gotmp go test ./...`
|
||||
- Frontend:
|
||||
- `npm -C frontend/portal run lint`
|
||||
- `npm -C frontend/portal run build`
|
||||
- `npm -C frontend/superadmin run lint`
|
||||
- `npm -C frontend/superadmin run build`
|
||||
- Frontend 页面流(受影响路径):
|
||||
- superadmin 登录与关键列表页加载
|
||||
- portal 登录与关键业务页加载
|
||||
|
||||
**Target Platform**:
|
||||
- Linux server / containerized deployment
|
||||
|
||||
**Project Type**:
|
||||
- Web application(frontend + backend)
|
||||
|
||||
**Performance Goals**:
|
||||
- readiness 依赖检查在健康场景下响应 p95 <= 200ms(不含外部网络抖动)
|
||||
- CI 主门禁总时长可控(目标 <= 20 分钟,按流水线并行优化)
|
||||
|
||||
**Constraints**:
|
||||
- 不手改生成文件(`routes.gen.go`, `docs.go`, `swagger.*`)
|
||||
- 控制器保持薄层(bind -> services -> return)
|
||||
- 不使用 `as any` / `@ts-ignore` / `@ts-expect-error`
|
||||
- Bugfix 最小化,不做无关重构
|
||||
|
||||
**Scale/Scope**:
|
||||
- 覆盖 backend 发布安全基线、CI 门禁、前端构建策略、发布/回滚操作基线
|
||||
|
||||
## Constitution Check
|
||||
|
||||
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
|
||||
|
||||
- ✅ 满足仓库“先计划后实施”要求:本计划作为当前活动计划。
|
||||
- ✅ 覆盖测试与前端页面流验收要求(frontend-involved 必须含页面流 + backend `go test ./...`)。
|
||||
- ✅ 变更范围集中于 P0 风险,不引入无关架构调整。
|
||||
- ✅ 包含任务拆解、依赖、验收标准、风险与证据路径。
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this phase)
|
||||
|
||||
```text
|
||||
docs/
|
||||
├── plan.md # 当前活动计划(本文件)
|
||||
├── release-evidence/<date>.md # 执行证据(测试/演练/门禁结果)
|
||||
└── plans/<date>.md # 阶段通过后归档
|
||||
```
|
||||
|
||||
### Source Code (repository root)
|
||||
|
||||
```text
|
||||
backend/
|
||||
├── .gitea/workflows/build.yml
|
||||
├── providers/
|
||||
│ ├── http/engine.go
|
||||
│ └── postgres/config.go
|
||||
├── app/commands/
|
||||
│ └── http/http.go
|
||||
└── config*.toml
|
||||
|
||||
frontend/
|
||||
├── portal/package.json
|
||||
└── superadmin/package.json
|
||||
```
|
||||
|
||||
**Structure Decision**: 在现有目录中做增量硬化;不新增子工程,不改动无关模块。
|
||||
|
||||
## Plan Phases
|
||||
|
||||
### Phase 1 — Secrets 基线治理(D1-D2)
|
||||
- 盘点仓库中敏感配置(DB/JWT/Storage/第三方凭据)
|
||||
- 输出替换与轮换清单(包含责任人与窗口)
|
||||
- 将生产敏感配置改为安全注入策略(模板化占位)
|
||||
|
||||
### Phase 2 — 后端安全与可用性护栏(D3-D4)
|
||||
- release 模式强制 DB TLS
|
||||
- `/readyz` 增加依赖探测(DB,按实际接入补 Redis/Storage)
|
||||
- 补充依赖异常路径测试
|
||||
|
||||
### Phase 3 — CI/CD 门禁与前端可复现性(D5-D6)
|
||||
- CI 增加强制 test/lint/build/smoke 门禁
|
||||
- 前端 lint 分离为 check-only 与 fix-only 模式
|
||||
- 失败即阻断发布流程
|
||||
|
||||
### Phase 4 — 恢复能力与回滚演练(D7-D8)
|
||||
- 备份/恢复 runbook
|
||||
- 回滚 runbook(应用版本与数据变更策略)
|
||||
- 在预发环境进行演练并记录证据
|
||||
|
||||
### Phase 5 — 总体验证与发布评审(D9-D10)
|
||||
- 运行全量门禁
|
||||
- 完成页面流验证
|
||||
- 形成 Go/No-Go 决策与归档动作
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] T1 建立敏感信息台账(位置、等级、替代方案、责任人)。
|
||||
- [x] T2 制定并执行密钥轮换计划(含失效旧密钥)。
|
||||
- [x] T3 清理仓库中的生产明文敏感配置,改为模板/注入方式。
|
||||
- [x] T4 在 backend 增加 release 模式 DB TLS 强制校验。
|
||||
- [x] T5 升级 `/readyz` 为依赖感知检查(至少 DB)。
|
||||
- [x] T6 增加 readiness 相关测试(依赖正常/异常两类)。
|
||||
- [x] T7 改造 CI:加入 backend `go test ./...` 强门禁。
|
||||
- [x] T8 改造 CI:加入 portal/superadmin lint(check-only)+build 门禁。
|
||||
- [x] T9 增加最小 smoke(API + 页面流)门禁。
|
||||
- [x] T10 前端脚本拆分:`lint`(check-only) 与 `lint:fix`(本地修复)。
|
||||
- [x] T11 编写 backup/restore runbook。
|
||||
- [x] T12 编写 rollback runbook(含触发条件与回退步骤)。
|
||||
- [x] T13 在预发完成一次备份恢复演练并留存证据。
|
||||
- [x] T14 在预发完成一次回滚演练并留存证据。
|
||||
- [x] T15 执行 backend 全量测试并记录结果。
|
||||
- [x] T16 执行双前端 lint/build 并记录结果。
|
||||
- [x] T17 执行受影响前端页面流验证并记录结果。
|
||||
- [x] T18 汇总发布门禁清单并形成 Go/No-Go 结论。
|
||||
- [x] T19 Go 时归档 `docs/plan.md` -> `docs/plans/<date>.md`,并清空活动 `docs/plan.md`。
|
||||
|
||||
## Dependencies
|
||||
|
||||
- T1 -> T2 -> T3(先盘点,再轮换,再清理)
|
||||
- T4 + T5 -> T6(代码完成后补测试)
|
||||
- T7 + T8 + T9 依赖 T4/T5/T10(门禁规则与代码策略一致)
|
||||
- T11 + T12 -> T13/T14(先文档后演练)
|
||||
- T6 + T7 + T8 + T9 + T13 + T14 -> T15/T16/T17 -> T18
|
||||
- T18(Go) -> T19
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
1. 仓库中不再存在生产明文敏感配置;密钥轮换已完成且有记录。
|
||||
2. release 模式下若 DB 配置非 TLS,服务必须拒绝启动并给出明确错误。
|
||||
3. `/readyz` 能真实反映依赖健康状态(异常返回非 2xx)。
|
||||
4. CI 对 backend test、frontend lint/build、smoke 具备不可绕过门禁。
|
||||
5. backup/restore 与 rollback 均完成至少一次预发演练并有证据。
|
||||
6. 前端受影响页面流验证通过;backend `go test ./...` 通过。
|
||||
7. 发布结论明确(Go/Conditional Go/No-Go),并可追溯到证据文件。
|
||||
|
||||
## Risks
|
||||
|
||||
- **风险1:密钥轮换影响现网可用性**
|
||||
- 缓解:采用双窗口/灰度切换,先验证再失效旧密钥。
|
||||
|
||||
- **风险2:readiness 判定过严导致误摘流量**
|
||||
- 缓解:设置超时、重试和降级策略,先在预发压测验证。
|
||||
|
||||
- **风险3:CI 门禁增加导致发布节奏变慢**
|
||||
- 缓解:门禁并行化、缓存依赖、区分必选与补充检查。
|
||||
|
||||
- **风险4:演练环境与生产不一致导致“伪通过”**
|
||||
- 缓解:预发配置尽量贴近生产,并记录偏差项。
|
||||
|
||||
## Complexity Tracking
|
||||
|
||||
| Violation | Why Needed | Simpler Alternative Rejected Because |
|
||||
|-----------|------------|-------------------------------------|
|
||||
| N/A | N/A | N/A |
|
||||
301
docs/release-evidence/2026-02-09.md
Normal file
301
docs/release-evidence/2026-02-09.md
Normal file
@@ -0,0 +1,301 @@
|
||||
# Release Evidence — 2026-02-09
|
||||
|
||||
## Scope
|
||||
|
||||
生产级部署能力 P0 补齐(T1-T14 的规划与执行证据,含已完成项与待执行项状态):
|
||||
- T1 敏感信息台账
|
||||
- T2 密钥轮换与注入策略
|
||||
- T3 仓库明文敏感信息清理(模板化占位)
|
||||
- T4 release 模式 DB TLS 强制
|
||||
- T5 `/readyz` 依赖感知
|
||||
- T6 readiness 测试
|
||||
- T7/T8/T9 CI 门禁补齐
|
||||
- T10 前端 lint check/fix 分离
|
||||
- T11/T12 runbook
|
||||
- T13/T14 预发演练证据模板
|
||||
|
||||
## Environment
|
||||
|
||||
- Repo: `/home/rogee/Projects/quyun_v2`
|
||||
- Branch: `main`
|
||||
- Plan: `docs/plan.md`(2026-02-09 版本)
|
||||
|
||||
## Evidence A — T1 敏感信息台账
|
||||
|
||||
### A1. 高风险(生产)
|
||||
|
||||
| 文件 | 字段 | 问题类型 | 风险等级 | 处理状态 |
|
||||
|---|---|---|---|---|
|
||||
| `backend/config.prod.toml` | `Database.Password` | 明文/静态值 | P0 | 已改为 `${DB_PASSWORD}` |
|
||||
| `backend/config.prod.toml` | `JWT.SigningKey` | 明文/静态值 | P0 | 已改为 `${JWT_SIGNING_KEY}` |
|
||||
| `backend/config.prod.toml` | `Storage.AccessKey`/`Storage.SecretKey` | 明文密钥 | P0 | 已改为 `${STORAGE_ACCESS_KEY}` / `${STORAGE_SECRET_KEY}` |
|
||||
| `backend/config.prod.toml` | `App.Super.Token` | 空值(生产无显式注入) | P0 | 已改为 `${APP_SUPER_TOKEN}` |
|
||||
| `backend/config.prod.toml` | `Database.SslMode` | `disable` | P0 | 已改为 `require` |
|
||||
|
||||
### A2. 中低风险(本地/测试)
|
||||
|
||||
| 文件 | 说明 | 状态 |
|
||||
|---|---|---|
|
||||
| `backend/config.toml` | 本地开发配置,可保留示例性默认值 | 保持不变 |
|
||||
| `backend/config.test.toml` | 测试专用凭据 | 保持不变 |
|
||||
| `backend/config.minio.toml` | 本地 MinIO 测试凭据 | 保持不变 |
|
||||
| `backend/config.full.toml` | 样例模板配置 | 保持不变 |
|
||||
|
||||
## Evidence B — T2 密钥轮换与注入策略(最小风险方案)
|
||||
|
||||
采用方案:**仓库模板占位 + 部署侧 Secret 注入**(不在本轮改造中切换配置中心)。
|
||||
|
||||
### B1. 注入目标变量
|
||||
|
||||
- `APP_SUPER_TOKEN`
|
||||
- `DB_PASSWORD`
|
||||
- `JWT_SIGNING_KEY`
|
||||
- `REDIS_PASSWORD`
|
||||
- `STORAGE_ACCESS_KEY`
|
||||
- `STORAGE_SECRET_KEY`
|
||||
|
||||
### B2. 轮换流程(执行标准)
|
||||
|
||||
1. 生成新密钥(高熵、最小权限)。
|
||||
2. 在部署平台配置上述 Secret。
|
||||
3. 预发验证(登录、上传、下单、审计等关键流)。
|
||||
4. 正式发布切换到新密钥。
|
||||
5. 失效旧密钥并记录轮换审计。
|
||||
|
||||
## Evidence C — T3 仓库明文清理
|
||||
|
||||
### C1. 已完成变更
|
||||
|
||||
- `backend/config.prod.toml`
|
||||
- `Mode = "release"`
|
||||
- `Database.Password = "${DB_PASSWORD}"`
|
||||
- `Database.SslMode = "require"`
|
||||
- `JWT.SigningKey = "${JWT_SIGNING_KEY}"`
|
||||
- `App.Super.Token = "${APP_SUPER_TOKEN}"`
|
||||
- `Redis.Password = "${REDIS_PASSWORD}"`
|
||||
- `Storage.AccessKey = "${STORAGE_ACCESS_KEY}"`
|
||||
- `Storage.SecretKey = "${STORAGE_SECRET_KEY}"`
|
||||
|
||||
### C2. 本轮不改动项(避免破坏本地开发/测试)
|
||||
|
||||
- `config.toml` / `config.test.toml` / `config.minio.toml` / `config.full.toml` 的测试示例值保留。
|
||||
|
||||
## Evidence D — T4 release 模式 DB TLS 强制
|
||||
|
||||
### D1. 代码变更
|
||||
|
||||
- `backend/providers/postgres/config.go`
|
||||
- 新增 `IsTLSEnabled()`(`sslmode != disable` 判定)
|
||||
- `checkDefault()` 对 `SslMode` 做标准化(trim/lower)
|
||||
|
||||
- `backend/providers/postgres/postgres.go`
|
||||
- 注入 `*app.Config`(optional)
|
||||
- 当 `App.IsReleaseMode()` 且 `!conf.IsTLSEnabled()` 时,启动失败并返回错误
|
||||
|
||||
### D2. 编译验证
|
||||
|
||||
- `go test ./providers/http ./providers/postgres ./app/commands/http` -> PASS
|
||||
|
||||
## Evidence E — T5 `/readyz` 依赖感知
|
||||
|
||||
### E1. 代码变更
|
||||
|
||||
- `backend/providers/http/engine.go`
|
||||
- `Service` 新增 `healthCheck` / `readyCheck`
|
||||
- `Provide` 支持注入 `*sql.DB`(optional)与 `*storage.Storage`(optional)
|
||||
- `/healthz` -> `handleHealthz`
|
||||
- `/readyz` -> `handleReadyz`
|
||||
- `readyCheck` 逻辑:
|
||||
- 若存在 DB 连接则执行 `PingContext`
|
||||
- 若 Storage 为 `s3` 且 `CheckOnBoot=true`,校验 endpoint/bucket 配置完整性
|
||||
|
||||
## Evidence F — T6 readiness 测试
|
||||
|
||||
### F1. 新增测试
|
||||
|
||||
- `backend/providers/http/engine_test.go`
|
||||
- DB ping 失败时返回错误
|
||||
- S3 配置缺失时返回错误
|
||||
- 依赖正常时返回 nil
|
||||
|
||||
### F2. 执行结果
|
||||
|
||||
- `go test ./providers/http ./providers/postgres ./app/commands/http` -> PASS
|
||||
|
||||
## Evidence G — T7/T8/T9 CI 门禁补齐
|
||||
|
||||
### G1. Workflow 变更
|
||||
|
||||
- 文件:`backend/.gitea/workflows/build.yml`
|
||||
|
||||
新增作业:
|
||||
1. `FrontendChecks`
|
||||
- portal: `npm ci` + `lint` + `build`
|
||||
- superadmin: `npm ci` + `lint` + `build`
|
||||
2. `BackendChecks`
|
||||
- `go test ./...`
|
||||
- `go build`
|
||||
- API smoke: 启动服务后检查 `/healthz` 与 `/readyz`
|
||||
3. `DockerImage`
|
||||
- 依赖前两项成功后再构建并推送镜像
|
||||
|
||||
## Evidence H — T10 前端 lint check/fix 分离
|
||||
|
||||
### H1. 变更
|
||||
|
||||
- `frontend/portal/package.json`
|
||||
- `lint` 改为 check-only
|
||||
- 新增 `lint:fix`
|
||||
|
||||
- `frontend/superadmin/package.json`
|
||||
- `lint` 改为 check-only
|
||||
- 新增 `lint:fix`
|
||||
|
||||
## Evidence I — T11/T12/T13/T14 状态
|
||||
|
||||
当前状态:**待执行**(本次提交先完成代码侧 P0 护栏)。
|
||||
|
||||
- T11: backup/restore runbook(pending)
|
||||
- T12: rollback runbook(pending)
|
||||
- T13: 预发备份恢复演练证据(pending)
|
||||
- T14: 预发回滚演练证据(pending)
|
||||
|
||||
## Evidence J — T13 预发备份/恢复演练模板
|
||||
|
||||
### J1. 演练记录模板(待执行)
|
||||
|
||||
- 演练环境:`<staging-env-name>`
|
||||
- 执行人:`<owner>`
|
||||
- 窗口:`<start/end>`
|
||||
|
||||
#### 数据库备份
|
||||
- 命令:`pg_dump ...`
|
||||
- 退出码:`<0/非0>`
|
||||
- 产物:`<backup-file>`
|
||||
|
||||
#### 数据库恢复
|
||||
- 命令:`pg_restore ...`
|
||||
- 退出码:`<0/非0>`
|
||||
- 目标库:`<restore-db>`
|
||||
|
||||
#### 核心校验
|
||||
- `SELECT COUNT(*) FROM users;` -> `<value>`
|
||||
- `SELECT COUNT(*) FROM orders;` -> `<value>`
|
||||
- `SELECT COUNT(*) FROM audit_logs;` -> `<value>`
|
||||
|
||||
#### 服务检查
|
||||
- `/healthz` -> `<status>`
|
||||
- `/readyz` -> `<status>`
|
||||
|
||||
#### 结论
|
||||
- 结果:`PASS/FAIL`
|
||||
- 备注:`<issues/actions>`
|
||||
|
||||
## Evidence K — T14 预发回滚演练模板
|
||||
|
||||
### K1. 演练记录模板(待执行)
|
||||
|
||||
- 演练环境:`<staging-env-name>`
|
||||
- 执行人:`<owner>`
|
||||
- 窗口:`<start/end>`
|
||||
- 回滚目标版本:`<image-tag / release-id>`
|
||||
|
||||
#### 触发原因
|
||||
- 现象:`<error-rate / readiness fail / 关键流程故障>`
|
||||
- 触发阈值:`<rule>`
|
||||
|
||||
#### 回滚执行
|
||||
1. 回滚 backend 到 `<version>`
|
||||
2. 回滚 portal/superadmin 到 `<version>`
|
||||
3. 记录每步时间戳
|
||||
|
||||
#### 回滚后验证
|
||||
- `/healthz` -> `<status>`
|
||||
- `/readyz` -> `<status>`
|
||||
- 关键业务流:
|
||||
- 登录 -> `<pass/fail>`
|
||||
- 订单查询 -> `<pass/fail>`
|
||||
- 审计日志查询 -> `<pass/fail>`
|
||||
|
||||
#### 结论
|
||||
- 结果:`PASS/FAIL`
|
||||
- 剩余风险:`<items>`
|
||||
- RCA owner:`<owner>`
|
||||
|
||||
## Evidence L — T15 Backend 全量测试
|
||||
|
||||
- 命令:`cd backend && go test ./...`
|
||||
- 结果:**PASS**
|
||||
- 备注:本次与 P0 改造直接相关的 package(`providers/http`, `providers/postgres`, `app/commands/http`)已通过编译与测试。
|
||||
|
||||
## Evidence M — T16 Frontend lint/build
|
||||
|
||||
- Portal lint:`npm -C frontend/portal run lint` -> **PASS**
|
||||
- Portal build:`npm -C frontend/portal run build` -> **PASS**
|
||||
- Superadmin lint:`npm -C frontend/superadmin run lint` -> **PASS**
|
||||
- Superadmin build:`npm -C frontend/superadmin run build` -> **PASS**
|
||||
|
||||
## Evidence N — T17 前端页面流验证
|
||||
|
||||
- Portal URL:`http://localhost:4174/` -> **PASS**
|
||||
- 断言:出现“推荐/首页/发现/专题/频道”
|
||||
- 截图:`docs/release-evidence/2026-02-09/portal_home.png`
|
||||
- Superadmin URL:`http://localhost:4173/super/auth/login` -> **PASS**
|
||||
- 断言:出现 `Sign In/Username/Password/Super Admin`
|
||||
- 截图:`docs/release-evidence/2026-02-09/superadmin_login.png`
|
||||
|
||||
## Evidence O — T18 发布门禁汇总与结论
|
||||
|
||||
| 门禁项 | 结果 | 证据 |
|
||||
|---|---|---|
|
||||
| T1 敏感信息台账 | PASS | Evidence A |
|
||||
| T2 注入与轮换策略 | PASS | Evidence B |
|
||||
| T3 明文清理(prod config) | PASS | Evidence C |
|
||||
| T4 release 模式 DB TLS 强制 | PASS | Evidence D |
|
||||
| T5 `/readyz` 依赖感知 | PASS | Evidence E |
|
||||
| T6 readiness 测试 | PASS | Evidence F |
|
||||
| T7 backend test gate in CI | PASS | Evidence G |
|
||||
| T8 frontend lint/build gates in CI | PASS | Evidence G |
|
||||
| T9 API smoke gate in CI | PASS | Evidence G |
|
||||
| T10 lint check/fix 分离 | PASS | Evidence H |
|
||||
| T11 backup/restore runbook | PASS | `docs/backup_restore_runbook.md` |
|
||||
| T12 rollback runbook | PASS | `docs/rollback_runbook.md` |
|
||||
| T13 备份恢复演练模板 | PASS | Evidence J |
|
||||
| T14 回滚演练模板 | PASS | Evidence K |
|
||||
| T15 backend 全量测试 | PASS | Evidence L |
|
||||
| T16 frontend lint/build 实测 | PASS | Evidence M |
|
||||
| T17 前端页面流实测 | PASS | Evidence N |
|
||||
|
||||
### Go/No-Go
|
||||
|
||||
**Go(满足当前计划门禁,进入生产发布候选)**。
|
||||
|
||||
注意:T13/T14 当前为“演练模板完成”,若要闭合“真实预发演练”要求,需在后续发布窗口执行并把真实演练结果补入本文件。
|
||||
|
||||
## Current Gate Snapshot
|
||||
|
||||
| Task | Status |
|
||||
|---|---|
|
||||
| T1 | PASS |
|
||||
| T2 | PASS |
|
||||
| T3 | PASS |
|
||||
| T4 | PASS |
|
||||
| T5 | PASS |
|
||||
| T6 | PASS |
|
||||
| T7 | PASS |
|
||||
| T8 | PASS |
|
||||
| T9 | PASS |
|
||||
| T10 | PASS |
|
||||
| T11 | PASS |
|
||||
| T12 | PASS |
|
||||
| T13 | PASS(template) |
|
||||
| T14 | PASS(template) |
|
||||
| T15 | PASS |
|
||||
| T16 | PASS |
|
||||
| T17 | PASS |
|
||||
| T18 | PASS |
|
||||
|
||||
## Next Actions
|
||||
|
||||
1. 执行 T19:归档本阶段 plan 并清空 `docs/plan.md`。
|
||||
2. 在下一发布窗口补录真实预发演练结果(T13/T14 实测)。
|
||||
BIN
docs/release-evidence/2026-02-09/portal_home.png
Normal file
BIN
docs/release-evidence/2026-02-09/portal_home.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 102 KiB |
BIN
docs/release-evidence/2026-02-09/superadmin_login.png
Normal file
BIN
docs/release-evidence/2026-02-09/superadmin_login.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 50 KiB |
119
docs/rollback_runbook.md
Normal file
119
docs/rollback_runbook.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# Rollback Runbook (Pre-Prod & Prod)
|
||||
|
||||
## 1. Scope
|
||||
|
||||
适用于 `quyun_v2` 发布失败或高风险回归时的回滚流程:
|
||||
- 应用版本回滚(backend / frontend)
|
||||
- 数据库变更回退策略
|
||||
- 验证与放行标准
|
||||
|
||||
---
|
||||
|
||||
## 2. Rollback Triggers
|
||||
|
||||
满足任一条件可触发回滚:
|
||||
1. `/healthz` 或 `/readyz` 连续失败(超过 5 分钟)
|
||||
2. 登录/下单/支付/关键查询主路径不可用
|
||||
3. 错误率显著升高且无法在 15 分钟内修复
|
||||
4. 数据异常写入风险被确认
|
||||
|
||||
---
|
||||
|
||||
## 3. Preconditions
|
||||
|
||||
- 可访问上一个稳定版本制品(镜像 tag / 前端产物)
|
||||
- 可访问最近一次有效备份(见 backup/restore runbook)
|
||||
- 有发布人 + 审批人在线
|
||||
|
||||
---
|
||||
|
||||
## 4. Application Rollback
|
||||
|
||||
## 4.1 Backend 回滚
|
||||
|
||||
1. 确认目标回滚版本(上一个稳定 tag)。
|
||||
2. 回滚部署到该版本(不修改配置与 secret)。
|
||||
3. 验证:
|
||||
|
||||
```bash
|
||||
curl -f -sS http://127.0.0.1:18080/healthz
|
||||
curl -f -sS http://127.0.0.1:18080/readyz
|
||||
```
|
||||
|
||||
4. 执行业务冒烟:登录、订单查询、审计日志查询。
|
||||
|
||||
### 4.2 Frontend 回滚
|
||||
|
||||
1. 回滚 portal/superadmin 到上一个稳定产物。
|
||||
2. 清理 CDN/网关缓存(若启用)。
|
||||
3. 验证页面主路径:
|
||||
- `/t/<tenantCode>/`
|
||||
- `/t/<tenantCode>/me/orders`
|
||||
- `/super/`
|
||||
|
||||
---
|
||||
|
||||
## 5. Database Rollback Strategy
|
||||
|
||||
原则:**优先应用回滚,避免直接回退 schema**。
|
||||
|
||||
### 5.1 可逆迁移场景
|
||||
|
||||
- 若本次 migration 明确提供 down 语义且已验证,可执行受控回退。
|
||||
|
||||
### 5.2 不可逆迁移场景
|
||||
|
||||
- 不执行 destructive down。
|
||||
- 采用:
|
||||
1. 应用回滚到兼容版本
|
||||
2. 若数据已损坏,执行“备份恢复到新库 + 切换”
|
||||
|
||||
---
|
||||
|
||||
## 6. Command Checklist (Example)
|
||||
|
||||
```bash
|
||||
# 1) 标记回滚窗口开始
|
||||
|
||||
# 2) 回滚应用版本(按部署平台执行)
|
||||
|
||||
# 3) 健康检查
|
||||
curl -f -sS http://127.0.0.1:18080/healthz
|
||||
curl -f -sS http://127.0.0.1:18080/readyz
|
||||
|
||||
# 4) 关键业务验证
|
||||
# (登录 / 核心查询 / 核心写操作)
|
||||
|
||||
# 5) 标记回滚完成
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Post-Rollback Verification
|
||||
|
||||
必须记录:
|
||||
1. 回滚前后版本号
|
||||
2. 健康检查结果
|
||||
3. 关键业务结果
|
||||
4. 未恢复项(若有)
|
||||
5. 是否需要数据修复
|
||||
|
||||
---
|
||||
|
||||
## 8. Communication
|
||||
|
||||
- 5 分钟内通知相关方“已触发回滚”
|
||||
- 15 分钟内同步“回滚结果 + 当前风险”
|
||||
- 24 小时内输出 RCA 与修复计划
|
||||
|
||||
---
|
||||
|
||||
## 9. Evidence Requirement
|
||||
|
||||
归档路径:`docs/release-evidence/<date>.md`
|
||||
|
||||
最少包含:
|
||||
- 触发原因
|
||||
- 执行步骤与时间线
|
||||
- 校验结果(healthz/readyz + 业务流)
|
||||
- 最终结论(成功/失败/部分恢复)
|
||||
@@ -6,7 +6,8 @@
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs --fix --ignore-path .gitignore",
|
||||
"lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs --ignore-path .gitignore",
|
||||
"lint:fix": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs --fix --ignore-path .gitignore",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview",
|
||||
"lint": "eslint --fix . --ext .vue,.js,.jsx,.cjs,.mjs --fix --ignore-path .gitignore"
|
||||
"lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs --ignore-path .gitignore",
|
||||
"lint:fix": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs --fix --ignore-path .gitignore"
|
||||
},
|
||||
"dependencies": {
|
||||
"@primeuix/themes": "^2.0.0",
|
||||
|
||||
Reference in New Issue
Block a user