From 1ddda89499512042c5b6840324efbd5c71d20fae Mon Sep 17 00:00:00 2001 From: Rogee Date: Mon, 17 Nov 2025 15:39:44 +0800 Subject: [PATCH] update --- AGENTS.md | 3 +- README.md | 14 +- configs/config.example.toml | 28 +- docs/architecture.md | 43 ++ internal/config/modules.go | 1 + internal/config/validation.go | 6 +- internal/hubmodule/README.md | 2 +- .../composer/hooks.go} | 294 +++++----- internal/hubmodule/composer/hooks_test.go | 43 ++ internal/hubmodule/docker/hooks.go | 105 ++++ internal/hubmodule/docker/hooks_test.go | 31 ++ internal/hubmodule/golang/hooks.go | 27 + internal/hubmodule/golang/hooks_test.go | 19 + internal/hubmodule/golang/module.go | 28 + internal/hubmodule/legacy/legacy_module.go | 2 +- internal/hubmodule/npm/hooks.go | 26 + internal/hubmodule/npm/hooks_test.go | 22 + internal/hubmodule/pypi/hooks.go | 215 ++++++++ internal/hubmodule/pypi/hooks_test.go | 42 ++ .../hubmodule/template/hook_example_test.go | 22 + internal/hubmodule/template/module.go | 6 +- internal/hubmodule/template/module_test.go | 73 +++ internal/logging/fields.go | 3 +- internal/proxy/docker_path_test.go | 68 --- internal/proxy/forwarder.go | 39 +- internal/proxy/forwarder_test.go | 16 + internal/proxy/handler.go | 508 ++++++------------ internal/proxy/handler_hook_test.go | 80 +++ internal/proxy/hooks/hooks.go | 38 +- internal/proxy/hooks/registry.go | 68 +++ internal/proxy/hooks/registry_test.go | 45 ++ internal/proxy/pypi_rewrite.go | 126 ----- internal/server/routes/modules.go | 58 +- internal/server/routes/modules_test.go | 67 +++ main.go | 9 + main_test.go | 29 + .../checklists/requirements.md | 34 ++ .../contracts/README.md | 16 + specs/006-module-hook-refactor/data-model.md | 37 ++ specs/006-module-hook-refactor/plan.md | 68 +++ specs/006-module-hook-refactor/quickstart.md | 44 ++ specs/006-module-hook-refactor/research.md | 23 + specs/006-module-hook-refactor/spec.md | 99 ++++ specs/006-module-hook-refactor/tasks.md | 100 ++++ tests/integration/hook_logging_test.go | 297 ++++++++++ tests/integration/module_diagnostics_test.go | 12 + 46 files changed, 2185 insertions(+), 751 deletions(-) create mode 100644 docs/architecture.md rename internal/{proxy/composer_rewrite.go => hubmodule/composer/hooks.go} (54%) create mode 100644 internal/hubmodule/composer/hooks_test.go create mode 100644 internal/hubmodule/docker/hooks.go create mode 100644 internal/hubmodule/docker/hooks_test.go create mode 100644 internal/hubmodule/golang/hooks.go create mode 100644 internal/hubmodule/golang/hooks_test.go create mode 100644 internal/hubmodule/golang/module.go create mode 100644 internal/hubmodule/npm/hooks.go create mode 100644 internal/hubmodule/npm/hooks_test.go create mode 100644 internal/hubmodule/pypi/hooks.go create mode 100644 internal/hubmodule/pypi/hooks_test.go create mode 100644 internal/hubmodule/template/hook_example_test.go create mode 100644 internal/hubmodule/template/module_test.go delete mode 100644 internal/proxy/docker_path_test.go create mode 100644 internal/proxy/handler_hook_test.go create mode 100644 internal/proxy/hooks/registry.go create mode 100644 internal/proxy/hooks/registry_test.go delete mode 100644 internal/proxy/pypi_rewrite.go create mode 100644 internal/server/routes/modules_test.go create mode 100644 specs/006-module-hook-refactor/checklists/requirements.md create mode 100644 specs/006-module-hook-refactor/contracts/README.md create mode 100644 specs/006-module-hook-refactor/data-model.md create mode 100644 specs/006-module-hook-refactor/plan.md create mode 100644 specs/006-module-hook-refactor/quickstart.md create mode 100644 specs/006-module-hook-refactor/research.md create mode 100644 specs/006-module-hook-refactor/spec.md create mode 100644 specs/006-module-hook-refactor/tasks.md create mode 100644 tests/integration/hook_logging_test.go diff --git a/AGENTS.md b/AGENTS.md index 6c5147d..aeaca73 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,6 +9,7 @@ Auto-generated from all feature plans. Last updated: 2025-11-13 - 仍使用本地 `StoragePath//` 目录缓存正文,并依赖 HEAD 对动态标签再验证 (003-hub-auth-fields) - 本地文件系统缓存目录 `StoragePath//`,模块需直接复用原始路径布局 (004-modular-proxy-cache) - 本地文件系统缓存目录 `StoragePath//`(按模块定义的布局) (005-proxy-module-delegation) +- 本地文件系统缓存目录 `StoragePath//`(由模块 Hook 定义布局) (006-module-hook-refactor) - Go 1.25+ (静态链接,单二进制交付) + Fiber v3(HTTP 服务)、Viper(配置)、Logrus + Lumberjack(结构化日志 [EXTRACTED FROM ALL PLAN.MD FILES] 滚动)、标准库 `net/http`/`io` (001-config-bootstrap) @@ -28,9 +29,9 @@ tests/ Go 1.25+ (静态链接,单二进制交付): Follow standard conventions ## Recent Changes +- 006-module-hook-refactor: Added Go 1.25+ (静态链接,单二进制交付) + Fiber v3(HTTP 服务)、Viper(配置)、Logrus + Lumberjack(结构化日志 & 滚动)、标准库 `net/http`/`io` - 005-proxy-module-delegation: Added Go 1.25+ (静态链接,单二进制交付) + Fiber v3(HTTP 服务)、Viper(配置)、Logrus + Lumberjack(结构化日志 & 滚动)、标准库 `net/http`/`io` - 004-modular-proxy-cache: Added Go 1.25+ (静态链接,单二进制交付) + Fiber v3(HTTP 服务)、Viper(配置)、Logrus + Lumberjack(结构化日志 & 滚动)、标准库 `net/http`/`io` -- 003-hub-auth-fields: Added Go 1.25+(静态链接单二进制) + Fiber v3(HTTP 服务)、Viper(配置加载/校验)、Logrus + Lumberjack(结构化日志 & 滚动)、标准库 `net/http`/`io`(代理回源) diff --git a/README.md b/README.md index 2e3a52a..016a679 100644 --- a/README.md +++ b/README.md @@ -48,9 +48,19 @@ Password = "s3cr3t" ## 模块化代理与示例 -- `configs/docker.sample.toml`、`configs/npm.sample.toml` 展示了 Docker/NPM 的最小配置,包含新的 `Module` 字段,复制后即可按需调整。 +- `configs/config.example.toml` 展示了多个 Hub 的组合:Docker Hub(省略 `Module`,自动使用 `Type` 同名 Hook)、Composer Hub(显式指定 `Module = "composer"`)以及 legacy 兜底 Hub,可直接复制修改。 - 运行 `./scripts/demo-proxy.sh docker`(或 `npm`)即可加载示例配置并启动代理,日志中会附带 `module_key` 字段,便于确认命中的是 `legacy` 还是自定义模块。 -- 若需自定义模块,可复制 `internal/hubmodule/template/`、在 `init()` 中调用 `hubmodule.MustRegister` 描述 metadata,并通过 `proxy.RegisterModuleHandler` 注入模块专属的 `ProxyHandler`,最后运行 `make modules-test` 自检。 +- Hook 开发流程: + 1. 复制 `internal/hubmodule/template/` 至 `internal/hubmodule//`,补全 `module.go` 与 `module_test.go`。 + 2. 在模块 `init()` 中调用 `hubmodule.MustRegister` 注册 metadata,并使用 `hooks.MustRegister` 注册 Hook(NormalizePath/ResolveUpstream/RewriteResponse 等)。 + 3. 为模块补充单元测试、`tests/integration/` 覆盖 miss→hit 流程,运行 `make modules-test`/`go test ./...`。 + 4. 更新配置:若 `[[Hub]].Module` 留空,将根据 `Type` 自动选择 Hook;也可显式设置 `Module = ""` 并通过 `Rollout` 控制 legacy/dual/modular。 + 5. 启动服务前,可通过 `curl -s /-/modules | jq '.hook_registry'` 确认 hook 注册情况;缺失时启动会直接失败,避免运行期回退到 legacy。 + +### 模块选择与 legacy +- `[[Hub]].Module` 为空时会自动回退到与 `Type` 同名的模块(若已注册),否则使用 `legacy` 兜底。 +- diagnostics `/-/modules` 将展示 `hook_status`,当模块仍使用 legacy 时会标记 `legacy-only`,便于排查。 +- legacy 模块仅提供最小兜底能力,迁移完成后应显式将 `Module` 设置为对应仓库的 Hook。 - 示例操作手册、常见问题参见 [`specs/003-hub-auth-fields/quickstart.md`](specs/003-hub-auth-fields/quickstart.md) 以及本特性的 [`quickstart.md`](specs/004-modular-proxy-cache/quickstart.md)。 ## CLI 标志 diff --git a/configs/config.example.toml b/configs/config.example.toml index 33cb660..8cb4093 100644 --- a/configs/config.example.toml +++ b/configs/config.example.toml @@ -13,13 +13,31 @@ InitialBackoff = "1s" UpstreamTimeout = "30s" [[Hub]] -Name = "docker" +Name = "docker-cache" Domain = "docker.hub.local" Upstream = "https://registry-1.docker.io" Proxy = "" -Type = "docker" # 必填:docker|npm|go -Module = "legacy" # 每个 Hub 使用的代理+缓存模块,默认为 legacy -Username = "" # 可选:若填写需与 Password 同时出现 +Type = "docker" # 省略 Module 时自动选择与 Type 同名的 Hook(此处为 docker) +# Module = "docker" # 如需明确指定,可取消注释 +Rollout = "modular" +Username = "" Password = "" -CacheTTL = 43200 # 可选: 覆盖全局缓存 TTL(秒) +CacheTTL = 43200 EnableHeadCheck = true + +[[Hub]] +Name = "composer-cache" +Domain = "composer.hub.local" +Upstream = "https://repo.packagist.org" +Type = "composer" +Module = "composer" # 显式绑定 composer Hook,启动时会验证 hook 是否已注册 +Rollout = "dual" # 可选:legacy-only/dual/modular +CacheTTL = 21600 + +[[Hub]] +Name = "legacy-fallback" +Domain = "legacy.hub.local" +Upstream = "https://registry.npmjs.org" +Type = "npm" +Module = "legacy" # 仍未迁移的 Hub 可显式指定 legacy,诊断会标记为 legacy-only +Rollout = "legacy-only" diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..ec23d73 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,43 @@ +# any-hub Architecture (Mermaid) + +```mermaid +flowchart LR + subgraph Clients + user[Anonymous Clients] + end + + subgraph Server["Fiber App (internal/server)"] + routes["/- routes & Hub registry\ninternal/server/routes"] + forwarder["Forwarder (module dispatch)\ninternal/proxy/forwarder"] + handler["Proxy Handler (orchestration)\ninternal/proxy/handler"] + end + + subgraph Modules["Hub Modules + Hooks"] + hookreg["Hook Registry\ninternal/proxy/hooks"] + hubreg["Module Metadata Registry\ninternal/hubmodule"] + hooks["Module Hooks\n(docker/npm/pypi/composer/go)"] + end + + subgraph CacheAndUpstream["Cache + Upstream"] + store["Cache Store\ninternal/cache"] + upstream["External Upstreams\nregistry/npm/simple/etc"] + end + + user -->|HTTP| routes + routes -->|HubRoute| forwarder + forwarder -->|module_key| handler + handler -->|lookup| hubreg + handler -->|hook fetch| hookreg + handler --> hooks + handler -->|read/write| store + handler -->|HTTP| upstream + + hooks --> handler + store --> handler + upstream --> handler +``` + +- Requests flow from clients into Fiber routes, which use the Hub registry to select a `HubRoute`. +- Forwarder chooses the module handler based on `module_key`; the proxy handler orchestrates cache lookup/write and upstream streaming. +- Module-specific logic (path normalization, upstream resolution, rewrites, cache policy) lives in Hooks per module; hook registry enforces registration. +- Cache store manages local filesystem layout, while upstreams provide original artifacts/content. diff --git a/internal/config/modules.go b/internal/config/modules.go index a5c39d5..c4fb937 100644 --- a/internal/config/modules.go +++ b/internal/config/modules.go @@ -3,6 +3,7 @@ package config import ( _ "github.com/any-hub/any-hub/internal/hubmodule/composer" _ "github.com/any-hub/any-hub/internal/hubmodule/docker" + _ "github.com/any-hub/any-hub/internal/hubmodule/golang" _ "github.com/any-hub/any-hub/internal/hubmodule/legacy" _ "github.com/any-hub/any-hub/internal/hubmodule/npm" _ "github.com/any-hub/any-hub/internal/hubmodule/pypi" diff --git a/internal/config/validation.go b/internal/config/validation.go index 1266bb1..8dec819 100644 --- a/internal/config/validation.go +++ b/internal/config/validation.go @@ -79,7 +79,11 @@ func (c *Config) Validate() error { moduleKey := strings.ToLower(strings.TrimSpace(hub.Module)) if moduleKey == "" { - moduleKey = hubmodule.DefaultModuleKey() + if _, ok := hubmodule.Resolve(normalizedType); ok && normalizedType != "" { + moduleKey = normalizedType + } else { + moduleKey = hubmodule.DefaultModuleKey() + } } if _, ok := hubmodule.Resolve(moduleKey); !ok { return newFieldError(hubField(hub.Name, "Module"), fmt.Sprintf("未注册模块: %s", moduleKey)) diff --git a/internal/hubmodule/README.md b/internal/hubmodule/README.md index ce31190..69ed087 100644 --- a/internal/hubmodule/README.md +++ b/internal/hubmodule/README.md @@ -24,7 +24,7 @@ internal/hubmodule/ 2. 填写模块特有逻辑与缓存策略,并确保包含中文注释解释设计。 3. 在模块目录添加 `module_test.go`,使用 `httptest.Server` 与 `t.TempDir()` 复现真实流量。 4. 运行 `make modules-test` 验证模块单元测试。 -5. 更新 `config.toml` 中对应 `[[Hub]].Module` 字段,验证集成测试后再提交。 +5. `[[Hub]].Module` 留空时会优先选择与 `Type` 同名的模块,实际迁移时仍建议显式填写,便于 diagnostics 标记 rollout。 ## 术语 - **Module Key**:模块唯一标识(如 `legacy`、`npm-tarball`)。 diff --git a/internal/proxy/composer_rewrite.go b/internal/hubmodule/composer/hooks.go similarity index 54% rename from internal/proxy/composer_rewrite.go rename to internal/hubmodule/composer/hooks.go index db35b56..83fdc01 100644 --- a/internal/proxy/composer_rewrite.go +++ b/internal/hubmodule/composer/hooks.go @@ -1,77 +1,136 @@ -package proxy +package composer import ( - "bytes" "encoding/json" - "fmt" - "io" - "net/http" "net/url" - "strconv" "strings" - "github.com/any-hub/any-hub/internal/server" + "github.com/any-hub/any-hub/internal/proxy/hooks" ) -func (h *Handler) rewriteComposerResponse(route *server.HubRoute, resp *http.Response, path string) (*http.Response, error) { - if resp == nil || route == nil || route.Config.Type != "composer" { - return resp, nil - } - if path == "/packages.json" { - return rewriteComposerRoot(resp, route.Config.Domain) - } - if !isComposerMetadataPath(path) { - return resp, nil - } - body, err := io.ReadAll(resp.Body) - if err != nil { - return resp, err - } - resp.Body.Close() - - rewritten, changed, err := rewriteComposerMetadata(body, route.Config.Domain) - if err != nil { - resp.Body = io.NopCloser(bytes.NewReader(body)) - return resp, err - } - if !changed { - resp.Body = io.NopCloser(bytes.NewReader(body)) - return resp, nil - } - - resp.Body = io.NopCloser(bytes.NewReader(rewritten)) - resp.ContentLength = int64(len(rewritten)) - resp.Header.Set("Content-Length", strconv.Itoa(len(rewritten))) - resp.Header.Set("Content-Type", "application/json") - resp.Header.Del("Content-Encoding") - resp.Header.Del("Etag") - return resp, nil +func init() { + hooks.MustRegister("composer", hooks.Hooks{ + NormalizePath: normalizePath, + ResolveUpstream: resolveDistUpstream, + RewriteResponse: rewriteResponse, + CachePolicy: cachePolicy, + ContentType: contentType, + }) } -func rewriteComposerRoot(resp *http.Response, domain string) (*http.Response, error) { - body, err := io.ReadAll(resp.Body) - if err != nil { - return resp, err +func normalizePath(_ *hooks.RequestContext, clean string, rawQuery []byte) (string, []byte) { + if isComposerDistPath(clean) { + return clean, nil } - resp.Body.Close() + return clean, rawQuery +} - data, changed, err := rewriteComposerRootBody(body, domain) - if err != nil { - resp.Body = io.NopCloser(bytes.NewReader(body)) - return resp, err +func resolveDistUpstream(_ *hooks.RequestContext, _ string, clean string, rawQuery []byte) string { + if !isComposerDistPath(clean) { + return "" + } + target, ok := parseComposerDistURL(clean, string(rawQuery)) + if !ok { + return "" + } + return target.String() +} + +func rewriteResponse( + ctx *hooks.RequestContext, + status int, + headers map[string]string, + body []byte, + path string, +) (int, map[string]string, []byte, error) { + switch { + case path == "/packages.json": + data, changed, err := rewriteComposerRootBody(body, ctx.Domain) + if err != nil { + return status, headers, body, err + } + if !changed { + return status, headers, body, nil + } + outHeaders := ensureJSONHeaders(headers) + return status, outHeaders, data, nil + case isComposerMetadataPath(path): + data, changed, err := rewriteComposerMetadata(body, ctx.Domain) + if err != nil { + return status, headers, body, err + } + if !changed { + return status, headers, body, nil + } + outHeaders := ensureJSONHeaders(headers) + return status, outHeaders, data, nil + default: + return status, headers, body, nil + } +} + +func ensureJSONHeaders(headers map[string]string) map[string]string { + if headers == nil { + headers = map[string]string{} + } + headers["Content-Type"] = "application/json" + delete(headers, "Content-Encoding") + delete(headers, "Etag") + return headers +} + +func cachePolicy(_ *hooks.RequestContext, locatorPath string, current hooks.CachePolicy) hooks.CachePolicy { + switch { + case isComposerDistPath(locatorPath): + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = false + case isComposerMetadataPath(locatorPath): + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = true + default: + current.AllowCache = false + current.AllowStore = false + current.RequireRevalidate = false + } + return current +} + +func contentType(_ *hooks.RequestContext, locatorPath string) string { + if isComposerMetadataPath(locatorPath) { + return "application/json" + } + return "" +} + +func rewriteComposerRootBody(body []byte, domain string) ([]byte, bool, error) { + type root struct { + Packages map[string]string `json:"packages"` + } + var payload root + if err := json.Unmarshal(body, &payload); err != nil { + return nil, false, err + } + if len(payload.Packages) == 0 { + return body, false, nil + } + changed := false + for key, value := range payload.Packages { + rewritten := rewriteComposerAbsolute(domain, value) + if rewritten != value { + payload.Packages[key] = rewritten + changed = true + } } if !changed { - resp.Body = io.NopCloser(bytes.NewReader(body)) - return resp, nil + return body, false, nil } - - resp.Body = io.NopCloser(bytes.NewReader(data)) - resp.ContentLength = int64(len(data)) - resp.Header.Set("Content-Length", strconv.Itoa(len(data))) - resp.Header.Set("Content-Type", "application/json") - resp.Header.Del("Content-Encoding") - resp.Header.Del("Etag") - return resp, nil + data, err := json.Marshal(payload) + if err != nil { + return nil, false, err + } + return data, true, nil } func rewriteComposerMetadata(body []byte, domain string) ([]byte, bool, error) { @@ -183,7 +242,7 @@ func rewriteComposerDistURL(domain, original string) string { if err != nil || parsed.Scheme == "" || parsed.Host == "" { return original } - prefix := fmt.Sprintf("/dist/%s/%s", parsed.Scheme, parsed.Host) + prefix := "/dist/" + parsed.Scheme + "/" + parsed.Host newURL := url.URL{ Scheme: "https", Host: domain, @@ -197,6 +256,29 @@ func rewriteComposerDistURL(domain, original string) string { return newURL.String() } +func rewriteComposerAbsolute(domain, raw string) string { + if raw == "" { + return raw + } + if strings.HasPrefix(raw, "//") { + return "https://" + domain + strings.TrimPrefix(raw, "//") + } + if strings.HasPrefix(raw, "http://") || strings.HasPrefix(raw, "https://") { + parsed, err := url.Parse(raw) + if err != nil { + return raw + } + parsed.Host = domain + parsed.Scheme = "https" + return parsed.String() + } + pathVal := raw + if !strings.HasPrefix(pathVal, "/") { + pathVal = "/" + pathVal + } + return "https://" + domain + pathVal +} + func isComposerMetadataPath(path string) bool { switch { case path == "/packages.json": @@ -218,74 +300,34 @@ func isComposerDistPath(path string) bool { return strings.HasPrefix(path, "/dist/") } -func rewriteComposerAbsolute(domain, raw string) string { - if raw == "" { - return raw +func parseComposerDistURL(path string, rawQuery string) (*url.URL, bool) { + if !strings.HasPrefix(path, "/dist/") { + return nil, false } - if strings.HasPrefix(raw, "//") { - return "https://" + domain + strings.TrimPrefix(raw, "//") + trimmed := strings.TrimPrefix(path, "/dist/") + parts := strings.SplitN(trimmed, "/", 3) + if len(parts) < 3 { + return nil, false } - if strings.HasPrefix(raw, "http://") || strings.HasPrefix(raw, "https://") { - parsed, err := url.Parse(raw) - if err != nil { - return raw - } - parsed.Host = domain - parsed.Scheme = "https" - return parsed.String() + scheme := parts[0] + host := parts[1] + rest := parts[2] + if scheme == "" || host == "" { + return nil, false } - pathVal := raw - if !strings.HasPrefix(pathVal, "/") { - pathVal = "/" + pathVal + if rest == "" { + rest = "/" + } else { + rest = "/" + rest } - return fmt.Sprintf("https://%s%s", domain, pathVal) -} - -func rewriteComposerRootBody(body []byte, domain string) ([]byte, bool, error) { - var root map[string]any - if err := json.Unmarshal(body, &root); err != nil { - return nil, false, err - } - - changed := false - for _, key := range []string{"metadata-url", "providers-api", "providers-url", "notify-batch"} { - if raw, ok := root[key].(string); ok && raw != "" { - newVal := rewriteComposerAbsolute(domain, raw) - if newVal != raw { - root[key] = newVal - changed = true - } - } - } - - if includes, ok := root["provider-includes"].(map[string]any); ok { - for file, hashVal := range includes { - pathVal := file - if rawPath, ok := hashVal.(map[string]any); ok { - if urlValue, ok := rawPath["url"].(string); ok { - pathVal = urlValue - } - } - newPath := rewriteComposerAbsolute(domain, pathVal) - if newPath != pathVal { - changed = true - } - if rawPath, ok := hashVal.(map[string]any); ok { - rawPath["url"] = newPath - includes[file] = rawPath - } else { - includes[file] = newPath - } - } - } - - if !changed { - return body, false, nil - } - - data, err := json.Marshal(root) - if err != nil { - return nil, false, err - } - return data, true, nil + target := &url.URL{ + Scheme: scheme, + Host: host, + Path: rest, + RawPath: rest, + } + if rawQuery != "" { + target.RawQuery = rawQuery + } + return target, true } diff --git a/internal/hubmodule/composer/hooks_test.go b/internal/hubmodule/composer/hooks_test.go new file mode 100644 index 0000000..fd50579 --- /dev/null +++ b/internal/hubmodule/composer/hooks_test.go @@ -0,0 +1,43 @@ +package composer + +import ( + "strings" + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func TestNormalizePathDropsDistQuery(t *testing.T) { + path, raw := normalizePath(nil, "/dist/https/example.com/file.zip", []byte("token=1")) + if raw != nil { + t.Fatalf("expected query to be dropped") + } + if path != "/dist/https/example.com/file.zip" { + t.Fatalf("unexpected path %s", path) + } +} + +func TestResolveDistUpstream(t *testing.T) { + url := resolveDistUpstream(nil, "", "/dist/https/example.com/file.zip", []byte("token=1")) + if url != "https://example.com/file.zip?token=1" { + t.Fatalf("unexpected upstream %s", url) + } +} + +func TestRewriteResponseUpdatesURLs(t *testing.T) { + ctx := &hooks.RequestContext{Domain: "cache.example"} + body := []byte(`{"packages":{"a/b":{"1.0.0":{"dist":{"url":"https://pkg.example/dist.zip"}}}}}`) + _, headers, rewritten, err := rewriteResponse(ctx, 200, map[string]string{}, body, "/p2/a/b.json") + if err != nil { + t.Fatalf("rewrite failed: %v", err) + } + if string(rewritten) == string(body) { + t.Fatalf("expected rewrite to modify payload") + } + if headers["Content-Type"] != "application/json" { + t.Fatalf("expected json content type") + } + if !strings.Contains(string(rewritten), "https://cache.example/dist/https/pkg.example/dist.zip") { + t.Fatalf("expected rewritten URL, got %s", string(rewritten)) + } +} diff --git a/internal/hubmodule/docker/hooks.go b/internal/hubmodule/docker/hooks.go new file mode 100644 index 0000000..4012004 --- /dev/null +++ b/internal/hubmodule/docker/hooks.go @@ -0,0 +1,105 @@ +package docker + +import ( + "strings" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func init() { + hooks.MustRegister("docker", hooks.Hooks{ + NormalizePath: normalizePath, + CachePolicy: cachePolicy, + ContentType: contentType, + }) +} + +func normalizePath(ctx *hooks.RequestContext, clean string, rawQuery []byte) (string, []byte) { + if !isDockerHubHost(ctx.UpstreamHost) { + return clean, rawQuery + } + repo, rest, ok := splitDockerRepoPath(clean) + if !ok || repo == "" || strings.Contains(repo, "/") || repo == "library" { + return clean, rawQuery + } + return "/v2/library/" + repo + rest, rawQuery +} + +func cachePolicy(_ *hooks.RequestContext, locatorPath string, current hooks.CachePolicy) hooks.CachePolicy { + clean := locatorPath + if clean == "/v2" || clean == "v2" || clean == "/v2/" { + return hooks.CachePolicy{} + } + if strings.Contains(clean, "/_catalog") { + return hooks.CachePolicy{} + } + if isDockerImmutablePath(clean) { + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = false + return current + } + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = true + return current +} + +func contentType(_ *hooks.RequestContext, locatorPath string) string { + switch { + case strings.Contains(locatorPath, "/tags/list"): + return "application/json" + case strings.Contains(locatorPath, "/blobs/"): + return "application/octet-stream" + default: + return "" + } +} + +func isDockerHubHost(host string) bool { + switch strings.ToLower(host) { + case "registry-1.docker.io", "docker.io", "index.docker.io": + return true + default: + return false + } +} + +func splitDockerRepoPath(path string) (string, string, bool) { + if !strings.HasPrefix(path, "/v2/") { + return "", "", false + } + suffix := strings.TrimPrefix(path, "/v2/") + if suffix == "" || suffix == "/" { + return "", "", false + } + segments := strings.Split(suffix, "/") + var repoSegments []string + for i, seg := range segments { + if seg == "" { + return "", "", false + } + switch seg { + case "manifests", "blobs", "tags", "referrers": + if len(repoSegments) == 0 { + return "", "", false + } + rest := "/" + strings.Join(segments[i:], "/") + return strings.Join(repoSegments, "/"), rest, true + case "_catalog": + return "", "", false + } + repoSegments = append(repoSegments, seg) + } + return "", "", false +} + +func isDockerImmutablePath(path string) bool { + if strings.Contains(path, "/blobs/sha256:") { + return true + } + if strings.Contains(path, "/manifests/sha256:") { + return true + } + return false +} diff --git a/internal/hubmodule/docker/hooks_test.go b/internal/hubmodule/docker/hooks_test.go new file mode 100644 index 0000000..53a8b56 --- /dev/null +++ b/internal/hubmodule/docker/hooks_test.go @@ -0,0 +1,31 @@ +package docker + +import ( + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func TestNormalizePathAddsLibraryForDockerHub(t *testing.T) { + ctx := &hooks.RequestContext{UpstreamHost: "registry-1.docker.io"} + path, _ := normalizePath(ctx, "/v2/nginx/manifests/latest", nil) + if path != "/v2/library/nginx/manifests/latest" { + t.Fatalf("expected library namespace, got %s", path) + } + + path, _ = normalizePath(ctx, "/v2/library/nginx/manifests/latest", nil) + if path != "/v2/library/nginx/manifests/latest" { + t.Fatalf("unexpected rewrite for existing namespace") + } +} + +func TestSplitDockerRepoPath(t *testing.T) { + repo, rest, ok := splitDockerRepoPath("/v2/library/nginx/manifests/latest") + if !ok || repo != "library/nginx" || rest != "/manifests/latest" { + t.Fatalf("unexpected split result repo=%s rest=%s ok=%v", repo, rest, ok) + } + + if _, _, ok := splitDockerRepoPath("/v2/_catalog"); ok { + t.Fatalf("expected catalog path to be ignored") + } +} diff --git a/internal/hubmodule/golang/hooks.go b/internal/hubmodule/golang/hooks.go new file mode 100644 index 0000000..9ff87ab --- /dev/null +++ b/internal/hubmodule/golang/hooks.go @@ -0,0 +1,27 @@ +package golang + +import "github.com/any-hub/any-hub/internal/proxy/hooks" + +import "strings" + +func init() { + hooks.MustRegister("go", hooks.Hooks{ + CachePolicy: cachePolicy, + }) +} + +func cachePolicy(_ *hooks.RequestContext, locatorPath string, current hooks.CachePolicy) hooks.CachePolicy { + if strings.Contains(locatorPath, "/@v/") && + (strings.HasSuffix(locatorPath, ".zip") || + strings.HasSuffix(locatorPath, ".mod") || + strings.HasSuffix(locatorPath, ".info")) { + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = false + return current + } + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = true + return current +} diff --git a/internal/hubmodule/golang/hooks_test.go b/internal/hubmodule/golang/hooks_test.go new file mode 100644 index 0000000..7e765c5 --- /dev/null +++ b/internal/hubmodule/golang/hooks_test.go @@ -0,0 +1,19 @@ +package golang + +import ( + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func TestCachePolicyForModuleFiles(t *testing.T) { + policy := cachePolicy(nil, "/example/@v/v1.0.0.zip", hooks.CachePolicy{}) + if !policy.AllowCache || policy.RequireRevalidate { + t.Fatalf("expected immutable go artifacts to be cacheable without revalidate") + } + + policy = cachePolicy(nil, "/example/@latest", hooks.CachePolicy{}) + if !policy.RequireRevalidate { + t.Fatalf("expected non-artifacts to require revalidate") + } +} diff --git a/internal/hubmodule/golang/module.go b/internal/hubmodule/golang/module.go new file mode 100644 index 0000000..09f3a3f --- /dev/null +++ b/internal/hubmodule/golang/module.go @@ -0,0 +1,28 @@ +package golang + +import ( + "time" + + "github.com/any-hub/any-hub/internal/hubmodule" +) + +const goDefaultTTL = 30 * time.Minute + +func init() { + hubmodule.MustRegister(hubmodule.ModuleMetadata{ + Key: "go", + Description: "Go module proxy with sumdb/cache defaults", + MigrationState: hubmodule.MigrationStateBeta, + SupportedProtocols: []string{ + "go", + }, + CacheStrategy: hubmodule.CacheStrategyProfile{ + TTLHint: goDefaultTTL, + ValidationMode: hubmodule.ValidationModeLastModified, + DiskLayout: "raw_path", + RequiresMetadataFile: false, + SupportsStreamingWrite: true, + }, + LocatorRewrite: hubmodule.DefaultLocatorRewrite("go"), + }) +} diff --git a/internal/hubmodule/legacy/legacy_module.go b/internal/hubmodule/legacy/legacy_module.go index 2051c1e..49258ac 100644 --- a/internal/hubmodule/legacy/legacy_module.go +++ b/internal/hubmodule/legacy/legacy_module.go @@ -3,7 +3,7 @@ package legacy import "github.com/any-hub/any-hub/internal/hubmodule" -// 模块描述:包装当前共享的代理 + 缓存实现,供未迁移的 Hub 使用。 +// 模块描述:包装当前共享的代理 + 缓存实现,供未迁移的 Hub 使用,并在 diagnostics 中标记为 legacy-only。 func init() { hubmodule.MustRegister(hubmodule.ModuleMetadata{ Key: hubmodule.DefaultModuleKey(), diff --git a/internal/hubmodule/npm/hooks.go b/internal/hubmodule/npm/hooks.go new file mode 100644 index 0000000..6df0ee9 --- /dev/null +++ b/internal/hubmodule/npm/hooks.go @@ -0,0 +1,26 @@ +package npm + +import ( + "strings" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func init() { + hooks.MustRegister("npm", hooks.Hooks{ + CachePolicy: cachePolicy, + }) +} + +func cachePolicy(_ *hooks.RequestContext, locatorPath string, current hooks.CachePolicy) hooks.CachePolicy { + if strings.Contains(locatorPath, "/-/") && strings.HasSuffix(locatorPath, ".tgz") { + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = false + return current + } + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = true + return current +} diff --git a/internal/hubmodule/npm/hooks_test.go b/internal/hubmodule/npm/hooks_test.go new file mode 100644 index 0000000..7aae704 --- /dev/null +++ b/internal/hubmodule/npm/hooks_test.go @@ -0,0 +1,22 @@ +package npm + +import ( + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func TestCachePolicyForTarball(t *testing.T) { + policy := cachePolicy(nil, "/pkg/-/pkg-1.0.0.tgz", hooks.CachePolicy{}) + if policy.RequireRevalidate { + t.Fatalf("tarball should not require revalidate") + } + if !policy.AllowCache { + t.Fatalf("tarball should allow cache") + } + + policy = cachePolicy(nil, "/pkg", hooks.CachePolicy{}) + if !policy.RequireRevalidate { + t.Fatalf("metadata should require revalidate") + } +} diff --git a/internal/hubmodule/pypi/hooks.go b/internal/hubmodule/pypi/hooks.go new file mode 100644 index 0000000..6b2c827 --- /dev/null +++ b/internal/hubmodule/pypi/hooks.go @@ -0,0 +1,215 @@ +package pypi + +import ( + "bytes" + "encoding/json" + "net/url" + "strings" + + "golang.org/x/net/html" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func init() { + hooks.MustRegister("pypi", hooks.Hooks{ + NormalizePath: normalizePath, + ResolveUpstream: resolveFilesUpstream, + RewriteResponse: rewriteResponse, + CachePolicy: cachePolicy, + ContentType: contentType, + }) +} + +func normalizePath(_ *hooks.RequestContext, clean string, rawQuery []byte) (string, []byte) { + if strings.HasPrefix(clean, "/files/") || strings.HasPrefix(clean, "/simple/") { + return ensureSimpleTrailingSlash(clean), rawQuery + } + if isDistributionAsset(clean) { + return clean, rawQuery + } + trimmed := strings.Trim(clean, "/") + if trimmed == "" || strings.HasPrefix(trimmed, "_") { + return clean, rawQuery + } + if !strings.HasSuffix(trimmed, "/") { + trimmed += "/" + } + return "/simple/" + trimmed, rawQuery +} + +func ensureSimpleTrailingSlash(path string) string { + if !strings.HasPrefix(path, "/simple/") { + return path + } + if strings.HasSuffix(path, "/") { + return path + } + return path + "/" +} + +func resolveFilesUpstream(_ *hooks.RequestContext, baseURL string, clean string, rawQuery []byte) string { + if !strings.HasPrefix(clean, "/files/") { + return "" + } + trimmed := strings.TrimPrefix(clean, "/files/") + parts := strings.SplitN(trimmed, "/", 3) + if len(parts) < 3 { + return "" + } + scheme := parts[0] + host := parts[1] + rest := parts[2] + if scheme == "" || host == "" { + return "" + } + target := url.URL{Scheme: scheme, Host: host, Path: "/" + strings.TrimPrefix(rest, "/")} + if len(rawQuery) > 0 { + target.RawQuery = string(rawQuery) + } + return target.String() +} + +func cachePolicy(_ *hooks.RequestContext, locatorPath string, current hooks.CachePolicy) hooks.CachePolicy { + if isDistributionAsset(locatorPath) { + current.AllowCache = true + current.AllowStore = true + current.RequireRevalidate = false + return current + } + current.RequireRevalidate = true + return current +} + +func contentType(_ *hooks.RequestContext, locatorPath string) string { + if strings.Contains(locatorPath, "/simple/") { + return "text/html" + } + return "" +} + +func rewriteResponse( + ctx *hooks.RequestContext, + status int, + headers map[string]string, + body []byte, + path string, +) (int, map[string]string, []byte, error) { + if !strings.HasPrefix(path, "/simple") && path != "/" { + return status, headers, body, nil + } + domain := ctx.Domain + rewritten, contentType, err := rewritePyPIBody(body, headers["Content-Type"], domain) + if err != nil { + return status, headers, body, err + } + if headers == nil { + headers = map[string]string{} + } + if contentType != "" { + headers["Content-Type"] = contentType + } + delete(headers, "Content-Encoding") + return status, headers, rewritten, nil +} + +func rewritePyPIBody(body []byte, contentType string, domain string) ([]byte, string, error) { + lowerCT := strings.ToLower(contentType) + if strings.Contains(lowerCT, "application/vnd.pypi.simple.v1+json") || strings.HasPrefix(strings.TrimSpace(string(body)), "{") { + data := map[string]interface{}{} + if err := json.Unmarshal(body, &data); err != nil { + return body, contentType, err + } + if files, ok := data["files"].([]interface{}); ok { + for _, entry := range files { + if fileMap, ok := entry.(map[string]interface{}); ok { + if urlValue, ok := fileMap["url"].(string); ok { + fileMap["url"] = rewritePyPIFileURL(domain, urlValue) + } + } + } + } + rewriteBytes, err := json.Marshal(data) + if err != nil { + return body, contentType, err + } + return rewriteBytes, "application/vnd.pypi.simple.v1+json", nil + } + + rewrittenHTML, err := rewritePyPIHTML(body, domain) + if err != nil { + return body, contentType, err + } + return rewrittenHTML, "text/html; charset=utf-8", nil +} + +func rewritePyPIHTML(body []byte, domain string) ([]byte, error) { + node, err := html.Parse(bytes.NewReader(body)) + if err != nil { + return nil, err + } + rewriteHTMLNode(node, domain) + var buf bytes.Buffer + if err := html.Render(&buf, node); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func rewriteHTMLNode(n *html.Node, domain string) { + if n.Type == html.ElementNode { + rewriteHTMLAttributes(n, domain) + } + for child := n.FirstChild; child != nil; child = child.NextSibling { + rewriteHTMLNode(child, domain) + } +} + +func rewriteHTMLAttributes(n *html.Node, domain string) { + for i, attr := range n.Attr { + switch attr.Key { + case "href", "data-dist-info-metadata", "data-core-metadata": + if strings.HasPrefix(attr.Val, "http://") || strings.HasPrefix(attr.Val, "https://") { + n.Attr[i].Val = rewritePyPIFileURL(domain, attr.Val) + } + } + } +} + +func rewritePyPIFileURL(domain, original string) string { + parsed, err := url.Parse(original) + if err != nil || parsed.Scheme == "" || parsed.Host == "" { + return original + } + prefix := "/files/" + parsed.Scheme + "/" + parsed.Host + newURL := url.URL{ + Scheme: "https", + Host: domain, + Path: prefix + parsed.Path, + RawQuery: parsed.RawQuery, + Fragment: parsed.Fragment, + } + if raw := parsed.RawPath; raw != "" { + newURL.RawPath = prefix + raw + } + return newURL.String() +} + +func isDistributionAsset(path string) bool { + switch { + case strings.HasSuffix(path, ".whl"): + return true + case strings.HasSuffix(path, ".tar.gz"): + return true + case strings.HasSuffix(path, ".tar.bz2"): + return true + case strings.HasSuffix(path, ".tgz"): + return true + case strings.HasSuffix(path, ".zip"): + return true + case strings.HasSuffix(path, ".egg"): + return true + default: + return false + } +} diff --git a/internal/hubmodule/pypi/hooks_test.go b/internal/hubmodule/pypi/hooks_test.go new file mode 100644 index 0000000..5e3c0e5 --- /dev/null +++ b/internal/hubmodule/pypi/hooks_test.go @@ -0,0 +1,42 @@ +package pypi + +import ( + "strings" + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func TestNormalizePathAddsSimplePrefix(t *testing.T) { + ctx := &hooks.RequestContext{HubType: "pypi"} + path, _ := normalizePath(ctx, "/requests", nil) + if path != "/simple/requests/" { + t.Fatalf("expected /simple prefix, got %s", path) + } +} + +func TestResolveFilesUpstream(t *testing.T) { + ctx := &hooks.RequestContext{} + target := resolveFilesUpstream(ctx, "", "/files/https/example.com/pkg.tgz", nil) + if target != "https://example.com/pkg.tgz" { + t.Fatalf("unexpected upstream target: %s", target) + } +} + +func TestRewriteResponseAdjustsLinks(t *testing.T) { + ctx := &hooks.RequestContext{Domain: "cache.example"} + body := []byte(`link`) + _, headers, rewritten, err := rewriteResponse(ctx, 200, map[string]string{"Content-Type": "text/html"}, body, "/simple/requests/") + if err != nil { + t.Fatalf("rewrite failed: %v", err) + } + if string(rewritten) == string(body) { + t.Fatalf("expected rewrite to modify HTML") + } + if headers["Content-Type"] == "" { + t.Fatalf("expected content type to be set") + } + if !strings.Contains(string(rewritten), "/files/https/files.pythonhosted.org/package.whl") { + t.Fatalf("expected rewritten link, got %s", string(rewritten)) + } +} diff --git a/internal/hubmodule/template/hook_example_test.go b/internal/hubmodule/template/hook_example_test.go new file mode 100644 index 0000000..7bc5985 --- /dev/null +++ b/internal/hubmodule/template/hook_example_test.go @@ -0,0 +1,22 @@ +package template + +import ( + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +// This test acts as a usage example for module authors. +func TestExampleHookDefinition(t *testing.T) { + h := hooks.Hooks{ + NormalizePath: func(ctx *hooks.RequestContext, clean string, rawQuery []byte) (string, []byte) { + return clean, rawQuery + }, + CachePolicy: func(ctx *hooks.RequestContext, path string, current hooks.CachePolicy) hooks.CachePolicy { + current.AllowCache = true + current.AllowStore = true + return current + }, + } + _ = h +} diff --git a/internal/hubmodule/template/module.go b/internal/hubmodule/template/module.go index e15377a..4c2c3e7 100644 --- a/internal/hubmodule/template/module.go +++ b/internal/hubmodule/template/module.go @@ -2,11 +2,11 @@ package template import "github.com/any-hub/any-hub/internal/hubmodule" -// + // 使用方式:复制整个目录到 internal/hubmodule// 并替换字段。 // - 将 TemplateModule 重命名为实际模块类型。 -// - 在 init() 中调用 hubmodule.MustRegister,注册新的 ModuleMetadata。 -// - 在模块目录中实现自定义代理/缓存逻辑,然后在 main 中调用 proxy.RegisterModuleHandler。 +// - 在 init() 中调用 hubmodule.MustRegister 注册新的 ModuleMetadata。 +// - 在模块目录中实现自定义 Hook(见 hook_example_test.go 中的示例),然后在 main/init 中调用 hooks.MustRegister + proxy.RegisterModule。 // // 注意:本文件仅示例 metadata 注册写法,不会参与编译。 var _ = hubmodule.ModuleMetadata{} diff --git a/internal/hubmodule/template/module_test.go b/internal/hubmodule/template/module_test.go new file mode 100644 index 0000000..58ee64b --- /dev/null +++ b/internal/hubmodule/template/module_test.go @@ -0,0 +1,73 @@ +package template + +import ( + "net/http" + "testing" + + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +// This test shows a full hook lifecycle that module authors can copy when creating a new hook. +func TestTemplateHookFlow(t *testing.T) { + baseURL := "https://example.com" + ctx := &hooks.RequestContext{ + HubName: "demo", + ModuleKey: "template", + } + + h := hooks.Hooks{ + NormalizePath: func(_ *hooks.RequestContext, clean string, rawQuery []byte) (string, []byte) { + return "/normalized" + clean, rawQuery + }, + ResolveUpstream: func(_ *hooks.RequestContext, upstream string, clean string, rawQuery []byte) string { + if len(rawQuery) > 0 { + return upstream + clean + "?" + string(rawQuery) + } + return upstream + clean + }, + CachePolicy: func(_ *hooks.RequestContext, path string, current hooks.CachePolicy) hooks.CachePolicy { + current.AllowCache = path != "" + current.AllowStore = true + return current + }, + ContentType: func(_ *hooks.RequestContext, path string) string { + if path == "/normalized/index.json" { + return "application/json" + } + return "" + }, + RewriteResponse: func(_ *hooks.RequestContext, status int, headers map[string]string, body []byte, _ string) (int, map[string]string, []byte, error) { + if headers == nil { + headers = map[string]string{} + } + headers["X-Demo"] = "ok" + return status, headers, body, nil + }, + } + + normalized, _ := h.NormalizePath(ctx, "/index.json", nil) + if normalized != "/normalized/index.json" { + t.Fatalf("expected normalized path, got %s", normalized) + } + u := h.ResolveUpstream(ctx, baseURL, normalized, nil) + if u != baseURL+normalized { + t.Fatalf("expected upstream %s, got %s", baseURL+normalized, u) + } + policy := h.CachePolicy(ctx, normalized, hooks.CachePolicy{}) + if !policy.AllowCache || !policy.AllowStore { + t.Fatalf("expected policy to allow cache/store, got %#v", policy) + } + status, headers, body, err := h.RewriteResponse(ctx, http.StatusOK, map[string]string{}, []byte("ok"), normalized) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if headers["X-Demo"] != "ok" { + t.Fatalf("expected rewrite to set header, got %s", headers["X-Demo"]) + } + if status != http.StatusOK || string(body) != "ok" { + t.Fatalf("expected unchanged status/body, got %d/%s", status, string(body)) + } + if ct := h.ContentType(ctx, normalized); ct != "application/json" { + t.Fatalf("expected content type application/json, got %s", ct) + } +} diff --git a/internal/logging/fields.go b/internal/logging/fields.go index e9df691..b73de72 100644 --- a/internal/logging/fields.go +++ b/internal/logging/fields.go @@ -11,13 +11,14 @@ func BaseFields(action, configPath string) logrus.Fields { } // RequestFields 提供 hub/domain/命中状态字段,供代理请求日志复用。 -func RequestFields(hub, domain, hubType, authMode, moduleKey, rolloutFlag string, cacheHit bool) logrus.Fields { +func RequestFields(hub, domain, hubType, authMode, moduleKey, rolloutFlag string, cacheHit bool, legacyOnly bool) logrus.Fields { return logrus.Fields{ "hub": hub, "domain": domain, "hub_type": hubType, "auth_mode": authMode, "cache_hit": cacheHit, + "legacy_only": legacyOnly, "module_key": moduleKey, "rollout_flag": rolloutFlag, } diff --git a/internal/proxy/docker_path_test.go b/internal/proxy/docker_path_test.go deleted file mode 100644 index 37da3a8..0000000 --- a/internal/proxy/docker_path_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package proxy - -import ( - "net/url" - "testing" - - "github.com/any-hub/any-hub/internal/config" - "github.com/any-hub/any-hub/internal/server" -) - -func TestApplyDockerHubNamespaceFallback(t *testing.T) { - route := dockerHubRoute(t, "https://registry-1.docker.io") - - path, changed := applyDockerHubNamespaceFallback(route, "/v2/nginx/manifests/latest") - if !changed { - t.Fatalf("expected fallback to apply") - } - if path != "/v2/library/nginx/manifests/latest" { - t.Fatalf("unexpected normalized path: %s", path) - } - - path, changed = applyDockerHubNamespaceFallback(route, "/v2/library/nginx/manifests/latest") - if changed { - t.Fatalf("expected no changes for already-namespaced repo") - } - - path, changed = applyDockerHubNamespaceFallback(route, "/v2/rogee/nginx/manifests/latest") - if changed { - t.Fatalf("expected no changes for custom namespace") - } - - path, changed = applyDockerHubNamespaceFallback(route, "/v2/_catalog") - if changed { - t.Fatalf("expected no changes for _catalog endpoint") - } - - otherRoute := dockerHubRoute(t, "https://registry.example.com") - path, changed = applyDockerHubNamespaceFallback(otherRoute, "/v2/nginx/manifests/latest") - if changed || path != "/v2/nginx/manifests/latest" { - t.Fatalf("expected no changes for non-docker-hub upstream") - } -} - -func TestSplitDockerRepoPath(t *testing.T) { - repo, rest, ok := splitDockerRepoPath("/v2/library/nginx/manifests/latest") - if !ok || repo != "library/nginx" || rest != "/manifests/latest" { - t.Fatalf("unexpected split result repo=%s rest=%s ok=%v", repo, rest, ok) - } - - if _, _, ok := splitDockerRepoPath("/v2/_catalog"); ok { - t.Fatalf("expected catalog path to be ignored") - } -} - -func dockerHubRoute(t *testing.T, upstream string) *server.HubRoute { - t.Helper() - parsed, err := url.Parse(upstream) - if err != nil { - t.Fatalf("invalid upstream: %v", err) - } - return &server.HubRoute{ - Config: config.HubConfig{ - Name: "docker", - Type: "docker", - }, - UpstreamURL: parsed, - } -} diff --git a/internal/proxy/forwarder.go b/internal/proxy/forwarder.go index df8798e..e101af6 100644 --- a/internal/proxy/forwarder.go +++ b/internal/proxy/forwarder.go @@ -8,6 +8,7 @@ import ( "github.com/gofiber/fiber/v3" "github.com/sirupsen/logrus" + "github.com/any-hub/any-hub/internal/hubmodule" "github.com/any-hub/any-hub/internal/logging" "github.com/any-hub/any-hub/internal/server" ) @@ -37,39 +38,48 @@ func RegisterModuleHandler(key string, handler server.ProxyHandler) { // Handle 实现 server.ProxyHandler,根据 route.ModuleKey 选择 handler。 func (f *Forwarder) Handle(c fiber.Ctx, route *server.HubRoute) error { + requestID := server.RequestID(c) handler := f.lookup(route) if handler == nil { - return f.respondMissingHandler(c, route) + return f.respondMissingHandler(c, route, requestID) } - return f.invokeHandler(c, route, handler) + return f.invokeHandler(c, route, handler, requestID) } -func (f *Forwarder) respondMissingHandler(c fiber.Ctx, route *server.HubRoute) error { - f.logModuleError(route, "module_handler_missing", nil) +func (f *Forwarder) respondMissingHandler(c fiber.Ctx, route *server.HubRoute, requestID string) error { + f.logModuleError(route, "module_handler_missing", nil, requestID) + setRequestIDHeader(c, requestID) return c.Status(fiber.StatusInternalServerError). JSON(fiber.Map{"error": "module_handler_missing"}) } -func (f *Forwarder) invokeHandler(c fiber.Ctx, route *server.HubRoute, handler server.ProxyHandler) (err error) { +func (f *Forwarder) invokeHandler(c fiber.Ctx, route *server.HubRoute, handler server.ProxyHandler, requestID string) (err error) { defer func() { if r := recover(); r != nil { - err = f.respondHandlerPanic(c, route, r) + err = f.respondHandlerPanic(c, route, r, requestID) } }() return handler.Handle(c, route) } -func (f *Forwarder) respondHandlerPanic(c fiber.Ctx, route *server.HubRoute, recovered interface{}) error { - f.logModuleError(route, "module_handler_panic", fmt.Errorf("panic: %v", recovered)) +func (f *Forwarder) respondHandlerPanic(c fiber.Ctx, route *server.HubRoute, recovered interface{}, requestID string) error { + f.logModuleError(route, "module_handler_panic", fmt.Errorf("panic: %v", recovered), requestID) + setRequestIDHeader(c, requestID) return c.Status(fiber.StatusInternalServerError). JSON(fiber.Map{"error": "module_handler_panic"}) } -func (f *Forwarder) logModuleError(route *server.HubRoute, code string, err error) { +func setRequestIDHeader(c fiber.Ctx, requestID string) { + if requestID != "" { + c.Set("X-Request-ID", requestID) + } +} + +func (f *Forwarder) logModuleError(route *server.HubRoute, code string, err error, requestID string) { if f.logger == nil { return } - fields := f.routeFields(route) + fields := f.routeFields(route, requestID) fields["action"] = "proxy" fields["error"] = code if err != nil { @@ -105,7 +115,7 @@ func normalizeModuleKey(key string) string { return strings.ToLower(strings.TrimSpace(key)) } -func (f *Forwarder) routeFields(route *server.HubRoute) logrus.Fields { +func (f *Forwarder) routeFields(route *server.HubRoute, requestID string) logrus.Fields { if route == nil { return logrus.Fields{ "hub": "", @@ -117,7 +127,7 @@ func (f *Forwarder) routeFields(route *server.HubRoute) logrus.Fields { } } - return logging.RequestFields( + fields := logging.RequestFields( route.Config.Name, route.Config.Domain, route.Config.Type, @@ -125,5 +135,10 @@ func (f *Forwarder) routeFields(route *server.HubRoute) logrus.Fields { route.ModuleKey, string(route.RolloutFlag), false, + route.ModuleKey == hubmodule.DefaultModuleKey(), ) + if requestID != "" { + fields["request_id"] = requestID + } + return fields } diff --git a/internal/proxy/forwarder_test.go b/internal/proxy/forwarder_test.go index b94b757..8008b0b 100644 --- a/internal/proxy/forwarder_test.go +++ b/internal/proxy/forwarder_test.go @@ -14,12 +14,15 @@ import ( "github.com/any-hub/any-hub/internal/server" ) +const requestIDKey = "_anyhub_request_id" + func TestForwarderMissingHandler(t *testing.T) { app := fiber.New() defer app.Shutdown() ctx := app.AcquireCtx(new(fasthttp.RequestCtx)) defer app.ReleaseCtx(ctx) + ctx.Locals(requestIDKey, "missing-req") logger := logrus.New() logBuf := &bytes.Buffer{} @@ -40,6 +43,12 @@ func TestForwarderMissingHandler(t *testing.T) { if !strings.Contains(logBuf.String(), "module_handler_missing") { t.Fatalf("expected log to mention module_handler_missing, got %s", logBuf.String()) } + if got := string(ctx.Response().Header.Peek("X-Request-ID")); got != "missing-req" { + t.Fatalf("expected request id header missing-req, got %s", got) + } + if !strings.Contains(logBuf.String(), "missing-req") { + t.Fatalf("expected log to include request id, got %s", logBuf.String()) + } } func TestForwarderHandlerPanic(t *testing.T) { @@ -58,6 +67,7 @@ func TestForwarderHandlerPanic(t *testing.T) { defer app.Shutdown() ctx := app.AcquireCtx(new(fasthttp.RequestCtx)) defer app.ReleaseCtx(ctx) + ctx.Locals(requestIDKey, "panic-req") logger := logrus.New() logBuf := &bytes.Buffer{} @@ -78,6 +88,12 @@ func TestForwarderHandlerPanic(t *testing.T) { if !strings.Contains(logBuf.String(), "module_handler_panic") { t.Fatalf("expected log to mention module_handler_panic, got %s", logBuf.String()) } + if got := string(ctx.Response().Header.Peek("X-Request-ID")); got != "panic-req" { + t.Fatalf("expected request id header panic-req, got %s", got) + } + if !strings.Contains(logBuf.String(), "panic-req") { + t.Fatalf("expected log to include panic request id, got %s", logBuf.String()) + } } func testRouteWithModule(moduleKey string) *server.HubRoute { diff --git a/internal/proxy/handler.go b/internal/proxy/handler.go index d9f5b7b..400ca56 100644 --- a/internal/proxy/handler.go +++ b/internal/proxy/handler.go @@ -36,6 +36,14 @@ type Handler struct { etags sync.Map // key: hub+path, value: etag/digest string } +type hookState struct { + ctx *hooks.RequestContext + def hooks.Hooks + hasHooks bool + clean string + rawQuery []byte +} + // NewHandler constructs a proxy handler with shared HTTP client/logger/store. func NewHandler(client *http.Client, logger *logrus.Logger, store cache.Store) *Handler { return &Handler{ @@ -45,23 +53,58 @@ func NewHandler(client *http.Client, logger *logrus.Logger, store cache.Store) * } } +func buildHookContext(route *server.HubRoute, c fiber.Ctx) *hooks.RequestContext { + if route == nil { + return &hooks.RequestContext{Method: c.Method()} + } + baseHost := "" + if route.UpstreamURL != nil { + baseHost = route.UpstreamURL.Host + } + return &hooks.RequestContext{ + HubName: route.Config.Name, + Domain: route.Config.Domain, + HubType: route.Config.Type, + ModuleKey: route.ModuleKey, + RolloutFlag: string(route.RolloutFlag), + UpstreamHost: baseHost, + Method: c.Method(), + } +} + +func hasHook(def hooks.Hooks) bool { + return def.NormalizePath != nil || + def.ResolveUpstream != nil || + def.RewriteResponse != nil || + def.CachePolicy != nil || + def.ContentType != nil +} + // Handle 执行缓存查找、条件回源和最终 streaming 逻辑,任何阶段出错都会输出结构化日志。 func (h *Handler) Handle(c fiber.Ctx, route *server.HubRoute) error { started := time.Now() requestID := server.RequestID(c) - reqCtx := newRequestContext(route, c.Method()) - moduleHooks, _ := hooks.For(route.ModuleKey) - locator := buildLocator(route, c, reqCtx, moduleHooks) - policy := determineCachePolicy(route, locator, c.Method(), reqCtx, moduleHooks) - strategyWriter := cache.NewStrategyWriter(h.store, route.CacheStrategy) - - if err := ensureProxyHubType(route); err != nil { - h.logger.WithFields(logrus.Fields{ - "hub": route.Config.Name, - "module_key": route.ModuleKey, - }).WithError(err).Error("hub_type_unsupported") - return h.writeError(c, fiber.StatusNotImplemented, "hub_type_unsupported") + hooksDef, ok := hooks.Fetch(route.ModuleKey) + hookCtx := buildHookContext(route, c) + rawQuery := append([]byte(nil), c.Request().URI().QueryString()...) + cleanPath := normalizeRequestPath(route, string(c.Request().URI().Path())) + if hasHook(hooksDef) && hooksDef.NormalizePath != nil { + newPath, newQuery := hooksDef.NormalizePath(hookCtx, cleanPath, rawQuery) + if newPath != "" { + cleanPath = newPath + } + rawQuery = newQuery } + locator := buildLocator(route, c, cleanPath, rawQuery) + policy := determineCachePolicyWithHook(route, locator, c.Method(), hooksDef, ok, hookCtx) + hookState := hookState{ + ctx: hookCtx, + def: hooksDef, + hasHooks: ok && hasHook(hooksDef), + clean: cleanPath, + rawQuery: rawQuery, + } + strategyWriter := cache.NewStrategyWriter(h.store, route.CacheStrategy) ctx := c.Context() if ctx == nil { @@ -89,7 +132,7 @@ func (h *Handler) Handle(c fiber.Ctx, route *server.HubRoute) error { if strategyWriter.ShouldBypassValidation(cached.Entry) { serve = true } else if strategyWriter.SupportsValidation() { - fresh, err := h.isCacheFresh(c, route, locator, cached.Entry) + fresh, err := h.isCacheFresh(c, route, locator, cached.Entry, &hookState) if err != nil { h.logger.WithError(err). WithFields(logrus.Fields{"hub": route.Config.Name, "module_key": route.ModuleKey}). @@ -104,12 +147,12 @@ func (h *Handler) Handle(c fiber.Ctx, route *server.HubRoute) error { } if serve { defer cached.Reader.Close() - return h.serveCache(c, route, cached, requestID, started) + return h.serveCache(c, route, cached, requestID, started, &hookState) } cached.Reader.Close() } - return h.fetchAndStream(c, route, locator, policy, strategyWriter, requestID, started, ctx, reqCtx, moduleHooks) + return h.fetchAndStream(c, route, locator, policy, strategyWriter, requestID, started, ctx, &hookState) } func (h *Handler) serveCache( @@ -118,6 +161,7 @@ func (h *Handler) serveCache( result *cache.ReadResult, requestID string, started time.Time, + hook *hookState, ) error { var readSeeker io.ReadSeeker switch reader := result.Reader.(type) { @@ -130,44 +174,12 @@ func (h *Handler) serveCache( method := c.Method() - contentType := inferCachedContentType(route, result.Entry.Locator) - if contentType == "" && shouldSniffDockerManifest(route, result.Entry.Locator) { + contentType := resolveContentType(route, result.Entry.Locator, hook) + if contentType == "" && shouldSniffDockerManifest(result.Entry.Locator) { if sniffed := sniffDockerManifestContentType(readSeeker); sniffed != "" { contentType = sniffed } } - if route != nil && route.Config.Type == "composer" && isComposerMetadataPath(stripQueryMarker(result.Entry.Locator.Path)) { - body, err := io.ReadAll(result.Reader) - result.Reader.Close() - if err != nil { - return fiber.NewError(fiber.StatusBadGateway, fmt.Sprintf("read cache failed: %v", err)) - } - rewritten := body - if stripQueryMarker(result.Entry.Locator.Path) == "/packages.json" { - if data, changed, err := rewriteComposerRootBody(body, route.Config.Domain); err == nil && changed { - rewritten = data - } - } else { - if data, changed, err := rewriteComposerMetadata(body, route.Config.Domain); err == nil && changed { - rewritten = data - } - } - - c.Set("Content-Type", "application/json") - c.Set("X-Any-Hub-Upstream", route.UpstreamURL.String()) - c.Set("X-Any-Hub-Cache-Hit", "true") - if requestID != "" { - c.Set("X-Request-ID", requestID) - } - c.Status(fiber.StatusOK) - c.Response().Header.SetContentLength(len(rewritten)) - _, err = c.Response().BodyWriter().Write(rewritten) - h.logResult(route, route.UpstreamURL.String(), requestID, fiber.StatusOK, true, started, err) - if err != nil { - return fiber.NewError(fiber.StatusBadGateway, fmt.Sprintf("read cache failed: %v", err)) - } - return nil - } if contentType != "" { c.Set("Content-Type", contentType) } else { @@ -214,35 +226,27 @@ func (h *Handler) fetchAndStream( requestID string, started time.Time, ctx context.Context, + hook *hookState, ) error { - resp, upstreamURL, err := h.executeRequest(c, route) + resp, upstreamURL, err := h.executeRequest(c, route, hook) if err != nil { h.logResult(route, upstreamURL.String(), requestID, 0, false, started, err) return h.writeError(c, fiber.StatusBadGateway, "upstream_failed") } - resp, upstreamURL, err = h.retryOnAuthFailure(c, route, requestID, started, resp, upstreamURL) + resp, upstreamURL, err = h.retryOnAuthFailure(c, route, requestID, started, resp, upstreamURL, hook) if err != nil { h.logResult(route, upstreamURL.String(), requestID, 0, false, started, err) return h.writeError(c, fiber.StatusBadGateway, "upstream_failed") } - if route.Config.Type == "pypi" { - if rewritten, rewriteErr := h.rewritePyPIResponse(route, resp, requestPath(c)); rewriteErr == nil { + if hook != nil && hook.hasHooks && hook.def.RewriteResponse != nil { + if rewritten, rewriteErr := applyHookRewrite(hook, resp, requestPath(c)); rewriteErr == nil { resp = rewritten } else { h.logger.WithError(rewriteErr).WithFields(logrus.Fields{ - "action": "pypi_rewrite", + "action": "hook_rewrite", "hub": route.Config.Name, - }).Warn("pypi_rewrite_failed") - } - } else if route.Config.Type == "composer" { - if rewritten, rewriteErr := h.rewriteComposerResponse(route, resp, requestPath(c)); rewriteErr == nil { - resp = rewritten - } else { - h.logger.WithError(rewriteErr).WithFields(logrus.Fields{ - "action": "composer_rewrite", - "hub": route.Config.Name, - }).Warn("composer_rewrite_failed") + }).Warn("hook_rewrite_failed") } } defer resp.Body.Close() @@ -252,6 +256,42 @@ func (h *Handler) fetchAndStream( return h.consumeUpstream(c, route, locator, resp, shouldStore, writer, requestID, started, ctx) } +func applyHookRewrite(hook *hookState, resp *http.Response, path string) (*http.Response, error) { + if hook == nil || hook.def.RewriteResponse == nil { + return resp, nil + } + body, err := io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return nil, err + } + headers := make(map[string]string, len(resp.Header)) + for key, values := range resp.Header { + if len(values) > 0 { + headers[key] = values[0] + } + } + status, newHeaders, newBody, rewriteErr := hook.def.RewriteResponse(hook.ctx, resp.StatusCode, headers, body, path) + if rewriteErr != nil { + return nil, rewriteErr + } + if newHeaders == nil { + newHeaders = headers + } + if newBody == nil { + newBody = body + } + cloned := *resp + cloned.StatusCode = status + cloned.Header = make(http.Header, len(newHeaders)) + for key, value := range newHeaders { + cloned.Header.Set(key, value) + } + cloned.Body = io.NopCloser(bytes.NewReader(newBody)) + cloned.ContentLength = int64(len(newBody)) + return &cloned, nil +} + func (h *Handler) consumeUpstream( c fiber.Ctx, route *server.HubRoute, @@ -335,6 +375,7 @@ func (h *Handler) retryOnAuthFailure( started time.Time, resp *http.Response, upstreamURL *url.URL, + hook *hookState, ) (*http.Response, *url.URL, error) { if !shouldRetryAuth(route, resp.StatusCode) { return resp, upstreamURL, nil @@ -354,30 +395,31 @@ func (h *Handler) retryOnAuthFailure( return nil, upstreamURL, err } authHeader := "Bearer " + token - retryResp, retryURL, err := h.executeRequestWithAuth(c, route, authHeader) + retryResp, retryURL, err := h.executeRequestWithAuth(c, route, hook, authHeader) if err != nil { return nil, upstreamURL, err } return retryResp, retryURL, nil } - retryResp, retryURL, err := h.executeRequest(c, route) + retryResp, retryURL, err := h.executeRequest(c, route, hook) if err != nil { return nil, upstreamURL, err } return retryResp, retryURL, nil } -func (h *Handler) executeRequest(c fiber.Ctx, route *server.HubRoute) (*http.Response, *url.URL, error) { - return h.executeRequestWithAuth(c, route, "") +func (h *Handler) executeRequest(c fiber.Ctx, route *server.HubRoute, hook *hookState) (*http.Response, *url.URL, error) { + return h.executeRequestWithAuth(c, route, hook, "") } func (h *Handler) executeRequestWithAuth( c fiber.Ctx, route *server.HubRoute, + hook *hookState, authHeader string, ) (*http.Response, *url.URL, error) { - upstreamURL := resolveUpstreamURL(route, route.UpstreamURL, c) + upstreamURL := resolveUpstreamURL(route, route.UpstreamURL, c, hook) body := bytesReader(c.Body()) req, err := h.buildUpstreamRequest(c, upstreamURL, route, c.Method(), body, authHeader) if err != nil { @@ -469,6 +511,7 @@ func (h *Handler) logResult( route.ModuleKey, string(route.RolloutFlag), cacheHit, + route.ModuleKey == hubmodule.DefaultModuleKey(), ) fields["action"] = "proxy" fields["upstream"] = upstream @@ -506,47 +549,20 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin return "application/x-tar" } - if route != nil { - switch route.Config.Type { - case "docker": - if strings.Contains(clean, "/manifests/") { - return "" - } - if strings.Contains(clean, "/tags/list") { - return "application/json" - } - if strings.Contains(clean, "/blobs/") { - return "application/octet-stream" - } - case "npm": - if strings.HasSuffix(clean, ".json") { - return "application/json" - } - case "pypi": - if strings.Contains(clean, "/simple/") { - return "text/html" - } - } - } - return "" } -func buildLocator(route *server.HubRoute, c fiber.Ctx) cache.Locator { - uri := c.Request().URI() - pathVal := string(uri.Path()) - clean := normalizeRequestPath(route, pathVal) - if newPath, ok := applyPyPISimpleFallback(route, clean); ok { - clean = newPath - } - if newPath, ok := applyDockerHubNamespaceFallback(route, clean); ok { - clean = newPath - } - query := uri.QueryString() - if route != nil && route.Config.Type == "composer" && isComposerDistPath(clean) { - // composer dist URLs often embed per-request tokens; ignore query for cache key - query = nil +func resolveContentType(route *server.HubRoute, locator cache.Locator, hook *hookState) string { + if hook != nil && hook.hasHooks && hook.def.ContentType != nil { + if ct := hook.def.ContentType(hook.ctx, stripQueryMarker(locator.Path)); ct != "" { + return ct + } } + return inferCachedContentType(route, locator) +} + +func buildLocator(route *server.HubRoute, c fiber.Ctx, clean string, rawQuery []byte) cache.Locator { + query := rawQuery if len(query) > 0 { sum := sha1.Sum(query) clean = fmt.Sprintf("%s/__qs/%s", clean, hex.EncodeToString(sum[:])) @@ -580,10 +596,7 @@ func stripQueryMarker(p string) string { return p } -func shouldSniffDockerManifest(route *server.HubRoute, locator cache.Locator) bool { - if route == nil || route.Config.Type != "docker" { - return false - } +func shouldSniffDockerManifest(locator cache.Locator) bool { clean := stripQueryMarker(locator.Path) return strings.Contains(clean, "/manifests/") } @@ -631,11 +644,7 @@ func normalizeRequestPath(route *server.HubRoute, raw string) string { if raw == "" { raw = "/" } - hasSlash := strings.HasSuffix(raw, "/") clean := path.Clean("/" + raw) - if route != nil && route.Config.Type == "pypi" && hasSlash && clean != "/" && !strings.HasSuffix(clean, "/") { - clean += "/" - } return clean } @@ -646,42 +655,28 @@ func bytesReader(b []byte) io.Reader { return bytes.NewReader(b) } -func resolveUpstreamURL(route *server.HubRoute, base *url.URL, c fiber.Ctx) *url.URL { +func resolveUpstreamURL(route *server.HubRoute, base *url.URL, c fiber.Ctx, hook *hookState) *url.URL { uri := c.Request().URI() - pathVal := string(uri.Path()) - clean := normalizeRequestPath(route, pathVal) - if newPath, ok := applyPyPISimpleFallback(route, clean); ok { - clean = newPath - } - if newPath, ok := applyDockerHubNamespaceFallback(route, clean); ok { - clean = newPath - } - if route != nil && route.Config.Type == "pypi" && strings.HasPrefix(clean, "/files/") { - trimmed := strings.TrimPrefix(clean, "/files/") - parts := strings.SplitN(trimmed, "/", 3) - if len(parts) >= 3 { - scheme := parts[0] - host := parts[1] - rest := parts[2] - filesBase := &url.URL{Scheme: scheme, Host: host} - if !strings.HasPrefix(rest, "/") { - rest = "/" + rest - } - relative := &url.URL{Path: rest, RawPath: rest} - if query := string(uri.QueryString()); query != "" { - relative.RawQuery = query - } - return filesBase.ResolveReference(relative) + rawQuery := append([]byte(nil), uri.QueryString()...) + clean := normalizeRequestPath(route, string(uri.Path())) + if hook != nil { + if hook.clean != "" { + clean = hook.clean } - } - if route != nil && route.Config.Type == "composer" && strings.HasPrefix(clean, "/dist/") { - if distTarget, ok := parseComposerDistURL(clean, string(uri.QueryString())); ok { - return distTarget + if hook.rawQuery != nil { + rawQuery = hook.rawQuery + } + if hook.hasHooks && hook.def.ResolveUpstream != nil { + if u := hook.def.ResolveUpstream(hook.ctx, base.String(), clean, rawQuery); u != "" { + if parsed, err := url.Parse(u); err == nil { + return parsed + } + } } } relative := &url.URL{Path: clean, RawPath: clean} - if query := string(uri.QueryString()); query != "" { - relative.RawQuery = query + if len(rawQuery) > 0 { + relative.RawQuery = string(rawQuery) } return base.ResolveReference(relative) } @@ -718,85 +713,27 @@ type cachePolicy struct { requireRevalidate bool } +func determineCachePolicyWithHook(route *server.HubRoute, locator cache.Locator, method string, def hooks.Hooks, enabled bool, ctx *hooks.RequestContext) cachePolicy { + base := determineCachePolicy(route, locator, method) + if !enabled || def.CachePolicy == nil { + return base + } + updated := def.CachePolicy(ctx, locator.Path, hooks.CachePolicy{ + AllowCache: base.allowCache, + AllowStore: base.allowStore, + RequireRevalidate: base.requireRevalidate, + }) + base.allowCache = updated.AllowCache + base.allowStore = updated.AllowStore + base.requireRevalidate = updated.RequireRevalidate + return base +} + func determineCachePolicy(route *server.HubRoute, locator cache.Locator, method string) cachePolicy { - if route == nil || method != http.MethodGet { + if method != http.MethodGet { return cachePolicy{} } - policy := cachePolicy{allowCache: true, allowStore: true} - path := stripQueryMarker(locator.Path) - switch route.Config.Type { - case "docker": - if path == "/v2" || path == "v2" || path == "/v2/" { - return cachePolicy{} - } - if strings.Contains(path, "/_catalog") { - return cachePolicy{} - } - if isDockerImmutablePath(path) { - return policy - } - policy.requireRevalidate = true - return policy - case "go": - if strings.Contains(path, "/@v/") && - (strings.HasSuffix(path, ".zip") || strings.HasSuffix(path, ".mod") || strings.HasSuffix(path, ".info")) { - return policy - } - policy.requireRevalidate = true - return policy - case "npm": - if strings.Contains(path, "/-/") && strings.HasSuffix(path, ".tgz") { - return policy - } - policy.requireRevalidate = true - return policy - case "pypi": - if isPyPIDistribution(path) { - return policy - } - policy.requireRevalidate = true - return policy - case "composer": - if isComposerDistPath(path) { - return policy - } - if isComposerMetadataPath(path) { - policy.requireRevalidate = true - return policy - } - return cachePolicy{} - default: - return policy - } -} - -func isDockerImmutablePath(path string) bool { - if strings.Contains(path, "/blobs/sha256:") { - return true - } - if strings.Contains(path, "/manifests/sha256:") { - return true - } - return false -} - -func isPyPIDistribution(path string) bool { - switch { - case strings.HasSuffix(path, ".whl"): - return true - case strings.HasSuffix(path, ".tar.gz"): - return true - case strings.HasSuffix(path, ".tar.bz2"): - return true - case strings.HasSuffix(path, ".tgz"): - return true - case strings.HasSuffix(path, ".zip"): - return true - case strings.HasSuffix(path, ".egg"): - return true - default: - return false - } + return cachePolicy{allowCache: true, allowStore: true} } func isCacheableStatus(status int) bool { @@ -808,13 +745,14 @@ func (h *Handler) isCacheFresh( route *server.HubRoute, locator cache.Locator, entry cache.Entry, + hook *hookState, ) (bool, error) { ctx := c.Context() if ctx == nil { ctx = context.Background() } - upstreamURL := resolveUpstreamURL(route, route.UpstreamURL, c) + upstreamURL := resolveUpstreamURL(route, route.UpstreamURL, c, hook) resp, err := h.revalidateRequest(c, route, upstreamURL, locator, "") if err != nil { return false, err @@ -888,113 +826,6 @@ func extractModTime(header http.Header) time.Time { return time.Now().UTC() } -func applyDockerHubNamespaceFallback(route *server.HubRoute, path string) (string, bool) { - if !isDockerHubRoute(route) { - return path, false - } - repo, rest, ok := splitDockerRepoPath(path) - if !ok || repo == "" { - return path, false - } - if repo == "library" || strings.Contains(repo, "/") { - return path, false - } - normalized := "/v2/library/" + repo + rest - return normalized, true -} - -func isDockerHubRoute(route *server.HubRoute) bool { - if route == nil || route.Config.Type != "docker" || route.UpstreamURL == nil { - return false - } - host := strings.ToLower(route.UpstreamURL.Hostname()) - switch host { - case "registry-1.docker.io", "docker.io", "index.docker.io": - return true - default: - return false - } -} - -func splitDockerRepoPath(path string) (string, string, bool) { - if !strings.HasPrefix(path, "/v2/") { - return "", "", false - } - suffix := strings.TrimPrefix(path, "/v2/") - if suffix == "" || suffix == "/" { - return "", "", false - } - segments := strings.Split(suffix, "/") - var repoSegments []string - for i, seg := range segments { - if seg == "" { - return "", "", false - } - switch seg { - case "manifests", "blobs", "tags", "referrers": - if len(repoSegments) == 0 { - return "", "", false - } - rest := "/" + strings.Join(segments[i:], "/") - return strings.Join(repoSegments, "/"), rest, true - case "_catalog": - return "", "", false - } - repoSegments = append(repoSegments, seg) - } - return "", "", false -} - -func applyPyPISimpleFallback(route *server.HubRoute, path string) (string, bool) { - if route == nil || route.Config.Type != "pypi" { - return path, false - } - if strings.HasPrefix(path, "/simple/") || strings.HasPrefix(path, "/files/") { - return path, false - } - if strings.HasSuffix(path, ".whl") || strings.HasSuffix(path, ".tar.gz") || strings.HasSuffix(path, ".tar.bz2") || - strings.HasSuffix(path, ".zip") { - return path, false - } - trimmed := strings.Trim(path, "/") - if trimmed == "" || strings.HasPrefix(trimmed, "_") { - return path, false - } - return "/simple/" + trimmed + "/", true -} - -func parseComposerDistURL(path string, rawQuery string) (*url.URL, bool) { - if !strings.HasPrefix(path, "/dist/") { - return nil, false - } - trimmed := strings.TrimPrefix(path, "/dist/") - parts := strings.SplitN(trimmed, "/", 3) - if len(parts) < 3 { - return nil, false - } - scheme := parts[0] - host := parts[1] - rest := parts[2] - if scheme == "" || host == "" { - return nil, false - } - if rest == "" { - rest = "/" - } else { - rest = "/" + rest - } - target := &url.URL{ - Scheme: scheme, - Host: host, - Path: rest, - RawPath: rest, - } - if rawQuery != "" { - target.RawQuery = rawQuery - } - return target, true -} - type bearerChallenge struct { Realm string Service string @@ -1130,6 +961,7 @@ func (h *Handler) logAuthRetry(route *server.HubRoute, upstream string, requestI route.ModuleKey, string(route.RolloutFlag), false, + route.ModuleKey == hubmodule.DefaultModuleKey(), ) fields["action"] = "proxy_retry" fields["upstream"] = upstream @@ -1150,6 +982,7 @@ func (h *Handler) logAuthFailure(route *server.HubRoute, upstream string, reques route.ModuleKey, string(route.RolloutFlag), false, + route.ModuleKey == hubmodule.DefaultModuleKey(), ) fields["action"] = "proxy" fields["upstream"] = upstream @@ -1204,20 +1037,3 @@ func normalizeETag(value string) string { } return strings.Trim(value, "\"") } - -func ensureProxyHubType(route *server.HubRoute) error { - switch route.Config.Type { - case "docker": - return nil - case "npm": - return nil - case "go": - return nil - case "pypi": - return nil - case "composer": - return nil - default: - return fmt.Errorf("unsupported hub type: %s", route.Config.Type) - } -} diff --git a/internal/proxy/handler_hook_test.go b/internal/proxy/handler_hook_test.go new file mode 100644 index 0000000..18ed859 --- /dev/null +++ b/internal/proxy/handler_hook_test.go @@ -0,0 +1,80 @@ +package proxy + +import ( + "net/url" + "testing" + + "github.com/gofiber/fiber/v3" + "github.com/valyala/fasthttp" + + "github.com/any-hub/any-hub/internal/cache" + "github.com/any-hub/any-hub/internal/config" + "github.com/any-hub/any-hub/internal/proxy/hooks" + "github.com/any-hub/any-hub/internal/server" +) + +func TestResolveUpstreamPrefersHook(t *testing.T) { + app := fiber.New() + defer app.Shutdown() + + ctx := app.AcquireCtx(new(fasthttp.RequestCtx)) + defer app.ReleaseCtx(ctx) + ctx.Request().SetRequestURI("/original/path?from=req") + + base, _ := url.Parse("https://up.example") + route := &server.HubRoute{ + Config: config.HubConfig{ + Name: "demo", + Type: "custom", + }, + UpstreamURL: base, + } + hook := &hookState{ + ctx: &hooks.RequestContext{}, + def: hooks.Hooks{ + NormalizePath: func(_ *hooks.RequestContext, clean string, rawQuery []byte) (string, []byte) { + return clean, rawQuery + }, + ResolveUpstream: func(_ *hooks.RequestContext, upstream string, clean string, rawQuery []byte) string { + return upstream + "/hooked" + }, + }, + hasHooks: true, + clean: "/ignored", + rawQuery: []byte("ignored=1"), + } + + target := resolveUpstreamURL(route, base, ctx, hook) + if target.String() != "https://up.example/hooked" { + t.Fatalf("expected hook override, got %s", target.String()) + } +} + +func TestCachePolicyHookOverrides(t *testing.T) { + route := &server.HubRoute{ + Config: config.HubConfig{ + Name: "demo", + Type: "npm", + }, + } + locator := cacheLocatorForTest("demo", "/a.tgz") + hook := hooks.Hooks{ + CachePolicy: func(_ *hooks.RequestContext, _ string, current hooks.CachePolicy) hooks.CachePolicy { + current.AllowCache = false + current.RequireRevalidate = false + return current + }, + } + ctx := &hooks.RequestContext{Method: fiber.MethodGet} + policy := determineCachePolicyWithHook(route, locator, fiber.MethodGet, hook, true, ctx) + if policy.allowCache { + t.Fatalf("expected hook to disable cache") + } + if policy.requireRevalidate { + t.Fatalf("expected hook to disable revalidate") + } +} + +func cacheLocatorForTest(hub, path string) cache.Locator { + return cache.Locator{HubName: hub, Path: path} +} diff --git a/internal/proxy/hooks/hooks.go b/internal/proxy/hooks/hooks.go index eeaf9e1..7e578ea 100644 --- a/internal/proxy/hooks/hooks.go +++ b/internal/proxy/hooks/hooks.go @@ -1,12 +1,5 @@ package hooks -import ( - "net/http" - "net/url" - "strings" - "sync" -) - // CachePolicy mirrors the proxy cache policy structure. type CachePolicy struct { AllowCache bool @@ -27,34 +20,9 @@ type RequestContext struct { // Hooks describes customization points for module-specific behavior. type Hooks struct { - NormalizePath func(ctx *RequestContext, cleanPath string) string - ResolveUpstream func(ctx *RequestContext, base *url.URL, cleanPath string, rawQuery []byte) *url.URL - RewriteResponse func(ctx *RequestContext, resp *http.Response, cleanPath string) (*http.Response, error) + NormalizePath func(ctx *RequestContext, cleanPath string, rawQuery []byte) (string, []byte) + ResolveUpstream func(ctx *RequestContext, baseURL string, path string, rawQuery []byte) string + RewriteResponse func(ctx *RequestContext, status int, headers map[string]string, body []byte, path string) (int, map[string]string, []byte, error) CachePolicy func(ctx *RequestContext, locatorPath string, current CachePolicy) CachePolicy ContentType func(ctx *RequestContext, locatorPath string) string } - -var registry sync.Map - -// Register stores hooks for the given module key. -func Register(moduleKey string, hooks Hooks) { - key := strings.ToLower(strings.TrimSpace(moduleKey)) - if key == "" { - return - } - registry.Store(key, hooks) -} - -// For retrieves hooks associated with a module key. -func For(moduleKey string) (Hooks, bool) { - key := strings.ToLower(strings.TrimSpace(moduleKey)) - if key == "" { - return Hooks{}, false - } - if value, ok := registry.Load(key); ok { - if hooks, ok := value.(Hooks); ok { - return hooks, true - } - } - return Hooks{}, false -} diff --git a/internal/proxy/hooks/registry.go b/internal/proxy/hooks/registry.go new file mode 100644 index 0000000..b9b92a4 --- /dev/null +++ b/internal/proxy/hooks/registry.go @@ -0,0 +1,68 @@ +package hooks + +import ( + "errors" + "strings" + "sync" +) + +var registry sync.Map + +// ErrDuplicateHook indicates a module key already has hooks registered. +var ErrDuplicateHook = errors.New("hook already registered") + +// Register stores hooks for the given module key. +func Register(moduleKey string, hooks Hooks) error { + key := normalizeKey(moduleKey) + if key == "" { + return errors.New("module key required") + } + if _, loaded := registry.LoadOrStore(key, hooks); loaded { + return ErrDuplicateHook + } + return nil +} + +// MustRegister panics on registration failure. +func MustRegister(moduleKey string, hooks Hooks) { + if err := Register(moduleKey, hooks); err != nil { + panic(err) + } +} + +// Fetch retrieves hooks associated with a module key. +func Fetch(moduleKey string) (Hooks, bool) { + key := normalizeKey(moduleKey) + if key == "" { + return Hooks{}, false + } + if value, ok := registry.Load(key); ok { + if hooks, ok := value.(Hooks); ok { + return hooks, true + } + } + return Hooks{}, false +} + +// Status returns hook registration status for a module key. +func Status(moduleKey string) string { + if _, ok := Fetch(moduleKey); ok { + return "registered" + } + return "missing" +} + +// Snapshot returns status for a list of module keys. +func Snapshot(keys []string) map[string]string { + out := make(map[string]string, len(keys)) + for _, key := range keys { + if normalized := normalizeKey(key); normalized != "" { + out[normalized] = Status(normalized) + } + } + return out +} + +func normalizeKey(key string) string { + return strings.ToLower(strings.TrimSpace(key)) +} diff --git a/internal/proxy/hooks/registry_test.go b/internal/proxy/hooks/registry_test.go new file mode 100644 index 0000000..acc1abf --- /dev/null +++ b/internal/proxy/hooks/registry_test.go @@ -0,0 +1,45 @@ +package hooks + +import ( + "sync" + "testing" +) + +func TestRegisterAndFetch(t *testing.T) { + registry = sync.Map{} + h := Hooks{ContentType: func(*RequestContext, string) string { return "ok" }} + if err := Register("test", h); err != nil { + t.Fatalf("register failed: %v", err) + } + if _, ok := Fetch("test"); !ok { + t.Fatalf("expected fetch ok") + } + if Status("test") != "registered" { + t.Fatalf("expected registered status") + } + if Status("missing") != "missing" { + t.Fatalf("expected missing status") + } +} + +func TestRegisterDuplicate(t *testing.T) { + registry = sync.Map{} + if err := Register("dup", Hooks{}); err != nil { + t.Fatalf("first register failed: %v", err) + } + if err := Register("dup", Hooks{}); err != ErrDuplicateHook { + t.Fatalf("expected ErrDuplicateHook, got %v", err) + } +} + +func TestSnapshot(t *testing.T) { + registry = sync.Map{} + _ = Register("a", Hooks{}) + snap := Snapshot([]string{"a", "b"}) + if snap["a"] != "registered" { + t.Fatalf("expected a registered, got %s", snap["a"]) + } + if snap["b"] != "missing" { + t.Fatalf("expected b missing, got %s", snap["b"]) + } +} diff --git a/internal/proxy/pypi_rewrite.go b/internal/proxy/pypi_rewrite.go deleted file mode 100644 index 47c6456..0000000 --- a/internal/proxy/pypi_rewrite.go +++ /dev/null @@ -1,126 +0,0 @@ -package proxy - -import ( - "bytes" - "encoding/json" - "io" - "net/http" - "net/url" - "strconv" - "strings" - - "golang.org/x/net/html" - - "github.com/any-hub/any-hub/internal/server" -) - -func (h *Handler) rewritePyPIResponse(route *server.HubRoute, resp *http.Response, path string) (*http.Response, error) { - if resp == nil { - return resp, nil - } - if !strings.HasPrefix(path, "/simple") && path != "/" { - return resp, nil - } - bodyBytes, err := io.ReadAll(resp.Body) - if err != nil { - return resp, err - } - resp.Body.Close() - - rewritten, contentType, err := rewritePyPIBody(bodyBytes, resp.Header.Get("Content-Type"), route.Config.Domain) - if err != nil { - resp.Body = io.NopCloser(bytes.NewReader(bodyBytes)) - return resp, err - } - - resp.Body = io.NopCloser(bytes.NewReader(rewritten)) - resp.ContentLength = int64(len(rewritten)) - resp.Header.Set("Content-Length", strconv.Itoa(len(rewritten))) - if contentType != "" { - resp.Header.Set("Content-Type", contentType) - } - resp.Header.Del("Content-Encoding") - return resp, nil -} - -func rewritePyPIBody(body []byte, contentType string, domain string) ([]byte, string, error) { - lowerCT := strings.ToLower(contentType) - if strings.Contains(lowerCT, "application/vnd.pypi.simple.v1+json") || strings.HasPrefix(strings.TrimSpace(string(body)), "{") { - data := map[string]interface{}{} - if err := json.Unmarshal(body, &data); err != nil { - return body, contentType, err - } - if files, ok := data["files"].([]interface{}); ok { - for _, entry := range files { - if fileMap, ok := entry.(map[string]interface{}); ok { - if urlValue, ok := fileMap["url"].(string); ok { - fileMap["url"] = rewritePyPIFileURL(domain, urlValue) - } - } - } - } - rewriteBytes, err := json.Marshal(data) - if err != nil { - return body, contentType, err - } - return rewriteBytes, "application/vnd.pypi.simple.v1+json", nil - } - - rewrittenHTML, err := rewritePyPIHTML(body, domain) - if err != nil { - return body, contentType, err - } - return rewrittenHTML, "text/html; charset=utf-8", nil -} - -func rewritePyPIHTML(body []byte, domain string) ([]byte, error) { - node, err := html.Parse(bytes.NewReader(body)) - if err != nil { - return nil, err - } - rewriteHTMLNode(node, domain) - var buf bytes.Buffer - if err := html.Render(&buf, node); err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func rewriteHTMLNode(n *html.Node, domain string) { - if n.Type == html.ElementNode { - rewriteHTMLAttributes(n, domain) - } - for child := n.FirstChild; child != nil; child = child.NextSibling { - rewriteHTMLNode(child, domain) - } -} - -func rewriteHTMLAttributes(n *html.Node, domain string) { - for i, attr := range n.Attr { - switch attr.Key { - case "href", "data-dist-info-metadata", "data-core-metadata": - if strings.HasPrefix(attr.Val, "http://") || strings.HasPrefix(attr.Val, "https://") { - n.Attr[i].Val = rewritePyPIFileURL(domain, attr.Val) - } - } - } -} - -func rewritePyPIFileURL(domain, original string) string { - parsed, err := url.Parse(original) - if err != nil || parsed.Scheme == "" || parsed.Host == "" { - return original - } - prefix := "/files/" + parsed.Scheme + "/" + parsed.Host - newURL := url.URL{ - Scheme: "https", - Host: domain, - Path: prefix + parsed.Path, - RawQuery: parsed.RawQuery, - Fragment: parsed.Fragment, - } - if raw := parsed.RawPath; raw != "" { - newURL.RawPath = prefix + raw - } - return newURL.String() -} diff --git a/internal/server/routes/modules.go b/internal/server/routes/modules.go index 198c43c..781a65a 100644 --- a/internal/server/routes/modules.go +++ b/internal/server/routes/modules.go @@ -8,6 +8,7 @@ import ( "github.com/gofiber/fiber/v3" "github.com/any-hub/any-hub/internal/hubmodule" + "github.com/any-hub/any-hub/internal/proxy/hooks" "github.com/any-hub/any-hub/internal/server" ) @@ -18,9 +19,11 @@ func RegisterModuleRoutes(app *fiber.App, registry *server.HubRegistry) { } app.Get("/-/modules", func(c fiber.Ctx) error { + hookStatus := hooks.Snapshot(hubmodule.Keys()) payload := fiber.Map{ - "modules": encodeModules(hubmodule.List()), - "hubs": encodeHubBindings(registry.List()), + "modules": encodeModules(hubmodule.List(), hookStatus), + "hubs": encodeHubBindings(registry.List()), + "hook_registry": hookStatus, } return c.JSON(payload) }) @@ -34,35 +37,39 @@ func RegisterModuleRoutes(app *fiber.App, registry *server.HubRegistry) { if !ok { return c.Status(fiber.StatusNotFound).JSON(fiber.Map{"error": "module_not_found"}) } - return c.JSON(encodeModule(meta)) + encoded := encodeModule(meta) + encoded.HookStatus = hooks.Status(key) + return c.JSON(encoded) }) } type modulePayload struct { - Key string `json:"key"` - Description string `json:"description"` + Key string `json:"key"` + Description string `json:"description"` MigrationState hubmodule.MigrationState `json:"migration_state"` - SupportedProtocols []string `json:"supported_protocols"` - CacheStrategy cacheStrategyPayload `json:"cache_strategy"` + SupportedProtocols []string `json:"supported_protocols"` + CacheStrategy cacheStrategyPayload `json:"cache_strategy"` + HookStatus string `json:"hook_status,omitempty"` } type cacheStrategyPayload struct { - TTLSeconds int64 `json:"ttl_seconds"` - ValidationMode string `json:"validation_mode"` - DiskLayout string `json:"disk_layout"` - RequiresMetadataFile bool `json:"requires_metadata_file"` - SupportsStreamingWrite bool `json:"supports_streaming_write"` + TTLSeconds int64 `json:"ttl_seconds"` + ValidationMode string `json:"validation_mode"` + DiskLayout string `json:"disk_layout"` + RequiresMetadataFile bool `json:"requires_metadata_file"` + SupportsStreamingWrite bool `json:"supports_streaming_write"` } type hubBindingPayload struct { - HubName string `json:"hub_name"` - ModuleKey string `json:"module_key"` - Domain string `json:"domain"` - Port int `json:"port"` - Rollout string `json:"rollout_flag"` + HubName string `json:"hub_name"` + ModuleKey string `json:"module_key"` + Domain string `json:"domain"` + Port int `json:"port"` + Rollout string `json:"rollout_flag"` + Legacy bool `json:"legacy_only"` } -func encodeModules(mods []hubmodule.ModuleMetadata) []modulePayload { +func encodeModules(mods []hubmodule.ModuleMetadata, status map[string]string) []modulePayload { if len(mods) == 0 { return nil } @@ -71,7 +78,11 @@ func encodeModules(mods []hubmodule.ModuleMetadata) []modulePayload { }) result := make([]modulePayload, 0, len(mods)) for _, meta := range mods { - result = append(result, encodeModule(meta)) + item := encodeModule(meta) + if s, ok := status[meta.Key]; ok { + item.HookStatus = s + } + result = append(result, item) } return result } @@ -84,10 +95,10 @@ func encodeModule(meta hubmodule.ModuleMetadata) modulePayload { MigrationState: meta.MigrationState, SupportedProtocols: append([]string(nil), meta.SupportedProtocols...), CacheStrategy: cacheStrategyPayload{ - TTLSeconds: int64(strategy.TTLHint / time.Second), - ValidationMode: string(strategy.ValidationMode), - DiskLayout: strategy.DiskLayout, - RequiresMetadataFile: strategy.RequiresMetadataFile, + TTLSeconds: int64(strategy.TTLHint / time.Second), + ValidationMode: string(strategy.ValidationMode), + DiskLayout: strategy.DiskLayout, + RequiresMetadataFile: strategy.RequiresMetadataFile, SupportsStreamingWrite: strategy.SupportsStreamingWrite, }, } @@ -108,6 +119,7 @@ func encodeHubBindings(routes []server.HubRoute) []hubBindingPayload { Domain: route.Config.Domain, Port: route.ListenPort, Rollout: string(route.RolloutFlag), + Legacy: route.ModuleKey == hubmodule.DefaultModuleKey(), }) } return result diff --git a/internal/server/routes/modules_test.go b/internal/server/routes/modules_test.go new file mode 100644 index 0000000..31c000d --- /dev/null +++ b/internal/server/routes/modules_test.go @@ -0,0 +1,67 @@ +package routes + +import ( + "testing" + "time" + + "github.com/any-hub/any-hub/internal/hubmodule" + "github.com/any-hub/any-hub/internal/proxy/hooks" +) + +func TestEncodeModulesAddsHookStatus(t *testing.T) { + modules := []hubmodule.ModuleMetadata{ + { + Key: "b", + CacheStrategy: hubmodule.CacheStrategyProfile{ + TTLHint: time.Hour, + ValidationMode: hubmodule.ValidationModeNever, + DiskLayout: "flat", + }, + }, + { + Key: "a", + CacheStrategy: hubmodule.CacheStrategyProfile{ + TTLHint: time.Minute, + ValidationMode: hubmodule.ValidationModeNever, + DiskLayout: "flat", + }, + }, + } + status := map[string]string{"a": "registered"} + + encoded := encodeModules(modules, status) + if len(encoded) != 2 { + t.Fatalf("expected 2 modules, got %d", len(encoded)) + } + if encoded[0].Key != "a" { + t.Fatalf("expected sorted module key a first, got %s", encoded[0].Key) + } + if encoded[0].HookStatus != "registered" { + t.Fatalf("expected hook status registered for a, got %s", encoded[0].HookStatus) + } + if encoded[1].Key != "b" { + t.Fatalf("expected second module key b, got %s", encoded[1].Key) + } + if encoded[1].HookStatus != "" { + t.Fatalf("expected empty hook status for b, got %s", encoded[1].HookStatus) + } +} + +func TestEncodeModuleAddsStatusForDetail(t *testing.T) { + key := "module-routes-test" + _ = hooks.Register(key, hooks.Hooks{}) + + meta := hubmodule.ModuleMetadata{ + Key: key, + CacheStrategy: hubmodule.CacheStrategyProfile{ + TTLHint: time.Minute, + ValidationMode: hubmodule.ValidationModeNever, + DiskLayout: "flat", + }, + } + payload := encodeModule(meta) + payload.HookStatus = hooks.Status(key) + if payload.HookStatus != "registered" { + t.Fatalf("expected hook status registered, got %s", payload.HookStatus) + } +} diff --git a/main.go b/main.go index 555b097..74412eb 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,7 @@ import ( "github.com/any-hub/any-hub/internal/hubmodule" "github.com/any-hub/any-hub/internal/logging" "github.com/any-hub/any-hub/internal/proxy" + "github.com/any-hub/any-hub/internal/proxy/hooks" "github.com/any-hub/any-hub/internal/server" "github.com/any-hub/any-hub/internal/server/routes" "github.com/any-hub/any-hub/internal/version" @@ -174,5 +175,13 @@ func registerModuleHandlers(handler server.ProxyHandler) error { return fmt.Errorf("module %s: %w", meta.Key, err) } } + for _, meta := range hubmodule.List() { + if meta.Key == hubmodule.DefaultModuleKey() { + continue + } + if hooks.Status(meta.Key) != "registered" { + return fmt.Errorf("module %s is missing hook registration", meta.Key) + } + } return nil } diff --git a/main_test.go b/main_test.go index feb5743..baa769c 100644 --- a/main_test.go +++ b/main_test.go @@ -2,8 +2,16 @@ package main import ( "bytes" + "fmt" "strings" "testing" + "time" + + "github.com/gofiber/fiber/v3" + + "github.com/any-hub/any-hub/internal/hubmodule" + "github.com/any-hub/any-hub/internal/proxy/hooks" + "github.com/any-hub/any-hub/internal/server" ) func TestParseCLIFlagsPriority(t *testing.T) { @@ -52,3 +60,24 @@ func TestRunVersionOutput(t *testing.T) { t.Fatalf("version 输出应包含 any-hub 标识") } } + +func TestRegisterModuleHandlersFailsWithoutHook(t *testing.T) { + useBufferWriters(t) + key := fmt.Sprintf("missing-hook-%d", time.Now().UnixNano()) + hubmodule.MustRegister(hubmodule.ModuleMetadata{ + Key: key, + Description: "test module without hooks", + MigrationState: hubmodule.MigrationStateBeta, + SupportedProtocols: []string{ + "custom", + }, + }) + defer hooks.MustRegister(key, hooks.Hooks{}) + + err := registerModuleHandlers(server.ProxyHandlerFunc(func(ctx fiber.Ctx, route *server.HubRoute) error { + return nil + })) + if err == nil || !strings.Contains(err.Error(), "missing hook") { + t.Fatalf("expected missing hook error, got %v", err) + } +} diff --git a/specs/006-module-hook-refactor/checklists/requirements.md b/specs/006-module-hook-refactor/checklists/requirements.md new file mode 100644 index 0000000..1271eb9 --- /dev/null +++ b/specs/006-module-hook-refactor/checklists/requirements.md @@ -0,0 +1,34 @@ +# Specification Quality Checklist: Module Hook Refactor + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2025-11-17 +**Feature**: specs/006-module-hook-refactor/spec.md + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Checklist complete; ready for `/speckit.plan`. diff --git a/specs/006-module-hook-refactor/contracts/README.md b/specs/006-module-hook-refactor/contracts/README.md new file mode 100644 index 0000000..f68d3f4 --- /dev/null +++ b/specs/006-module-hook-refactor/contracts/README.md @@ -0,0 +1,16 @@ +# Contracts: Module Hook Refactor + +## `/ - /modules` Diagnostics + +- **Purpose**: 列出所有模块的 metadata 与 Hook 注册状态,SRE 可检查模块是否迁移到 Hook 模式。 +- **Response Additions**: + - `hook_status`: `registered | legacy-only | missing` + - `handler_status`: `ok | missing | panic` +- **Usage**: SRE 通过 `curl http://host:port/-/modules` 观察所有模块状态;缺失 Hook 或 handler 时需在日志与响应中同步体现。 + +## Error Responses + +- `module_handler_missing`: 500 JSON `{ "error": "module_handler_missing" }` +- `module_handler_panic`: 500 JSON `{ "error": "module_handler_panic" }` + +这些错误需出现在日志中并附带 `hub/domain/module_key/request_id`。 diff --git a/specs/006-module-hook-refactor/data-model.md b/specs/006-module-hook-refactor/data-model.md new file mode 100644 index 0000000..3d4ec8a --- /dev/null +++ b/specs/006-module-hook-refactor/data-model.md @@ -0,0 +1,37 @@ +# Data Model: Module Hook Refactor + +## Entities + +- **ModuleHook** + - Attributes: `module_key`, `normalize_path`, `resolve_upstream`, `rewrite_response`, `cache_policy`, `content_type`(函数指针/接口)。 + - Behavior: 由模块在 init() 或启动阶段注册;proxy handler 在请求生命周期调用。 + - Constraints: 所有函数可选;若未实现则 fallback 到通用逻辑;禁止造成路径逃逸或空 handler。 + +- **HookRegistry** + - Attributes: `map[module_key]ModuleHook`、并发安全读写锁。 + - Behavior: 提供 `Register`, `MustRegister`, `Fetch`;在启动时验证唯一性。 + - Constraints: module_key 小写唯一;重复注册报错。 + +- **LegacyHandler** + - Attributes: 使用旧行为的 handler(默认缓存策略、路径重写)。 + - Behavior: 作为默认 handler;Hook 缺失时退回,并在日志/诊断中标记。 + +- **ProxyDispatcher** + - Attributes: handler map(module_key→handler),默认 handler,日志指针。 + - Behavior: lookup handler → 调用并做错误捕获;缺失时返回 `module_handler_missing`。 + +- **Diagnostics Snapshot** + - Attributes: 模块元数据 + Hook 状态(`registered`/`legacy`/`missing`)。 + - Behavior: `/ - /modules` 接口读取 HookRegistry 与 HubRegistry,生成 JSON。 + +## Relationships + +- Hub module 注册时同时在 HookRegistry 与 Forwarder handler map 建立关联。 +- ProxyDispatcher 在请求进入后根据 route.ModuleKey 查询 Hook + handler。 +- Diagnostics 依赖 HookRegistry 与 HubRegistry 联合输出状态。 + +## Lifecycle + +1. 启动:加载 `config.toml` → 初始化 HookRegistry(legacy 默认) → 模块 init() 注册 Hook。 +2. 运行时:请求 → Dispatcher 查找 handler + Hook → 调用 Hook 执行特定逻辑 → 通用缓存/回源流程。 +3. 诊断:`/-/modules` 读取当前 Hook 状态并输出。 diff --git a/specs/006-module-hook-refactor/plan.md b/specs/006-module-hook-refactor/plan.md new file mode 100644 index 0000000..38133ab --- /dev/null +++ b/specs/006-module-hook-refactor/plan.md @@ -0,0 +1,68 @@ +# Implementation Plan: Module Hook Refactor + +**Branch**: `006-module-hook-refactor` | **Date**: 2025-11-17 | **Spec**: specs/006-module-hook-refactor/spec.md +**Input**: Feature specification from `/specs/006-module-hook-refactor/spec.md` + +**Note**: This file captures planning up to Phase 2 (tasks generated separately). + +## Summary + +目标:让每个 hubmodule 完整自管缓存/代理逻辑(路径重写、缓存策略、上游解析、响应重写等),proxy handler 仅负责调度、缓存读写及统一日志/错误包装。技术路线:\n1. 定义 Hook/Handler 契约与注册机制。\n2. 将 Docker/NPM/PyPI/Composer/Go 的特化逻辑迁移到各自 Hook。\n3. legacy handler 仅做兜底;proxy handler 移除所有 `hub_type` 分支并加强错误观测。 + +## Technical Context + +**Language/Version**: Go 1.25+ (静态链接,单二进制交付) +**Primary Dependencies**: Fiber v3(HTTP 服务)、Viper(配置)、Logrus + Lumberjack(结构化日志 & 滚动)、标准库 `net/http`/`io` +**Storage**: 本地文件系统缓存目录 `StoragePath//`(由模块 Hook 定义布局) +**Testing**: `go test ./...`,使用 `httptest`、临时目录与模块 Hook 示例验证缓存/代理路径 +**Target Platform**: Linux/Unix CLI 进程,由 systemd/supervisor 管理,匿名下游客户端 +**Project Type**: 单 Go 项目(`cmd/` 入口 + `internal/*` 包) +**Performance Goals**: 缓存命中直接返回;回源路径需流式转发,单请求常驻内存 <256MB;命中/回源日志可追踪 +**Constraints**: 禁止 Web UI 或账号体系;所有行为受单一 TOML 控制;每个 Hub 需独立 Domain 绑定;仅匿名访问 +**Scale/Scope**: 支撑 Docker/NPM/Go/PyPI/Composer,多仓 Hook 自治,面向弱网及离线缓存复用场景 + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +- Feature 仍然是“轻量多仓 CLI 代理”,未引入 Web UI、账号体系或与代理无关的能力。 +- 仅使用 Go + 宪法指定依赖;任何新第三方库都已在本计划中说明理由与审核结论。 +- 行为完全由 `config.toml` 控制,新增配置项已规划默认值、校验与迁移策略。 +- 方案维持缓存优先 + 流式回源路径,并给出命中/回源/失败的日志与观测手段。 +- 计划内列出了配置解析、缓存读写、Host Header 路由等强制测试与中文注释交付范围。 + +## Project Structure + +### Documentation (this feature) + +```text +specs/[###-feature]/ +├── plan.md # This file (/speckit.plan command output) +├── research.md # Phase 0 output (/speckit.plan command) +├── data-model.md # Phase 1 output (/speckit.plan command) +├── quickstart.md # Phase 1 output (/speckit.plan command) +├── contracts/ # Phase 1 output (/speckit.plan command) +└── tasks.md # Phase 2 output (/speckit.tasks command - NOT created by /speckit.plan) +``` + +### Source Code (repository root) +```text +cmd/any-hub/main.go # CLI 入口、参数解析 +internal/config/ # TOML 加载、默认值、校验 +internal/server/ # Fiber 服务、路由、中间件 +internal/cache/ # 磁盘/内存缓存与 .meta 管理 +internal/proxy/ # 上游访问、缓存策略、流式复制 +configs/ # 示例 config.toml(如需) +tests/ # `go test` 下的单元/集成测试,用临时目录 +``` + +**Structure Decision**: 采用单 Go 项目结构,特性代码应放入上述现有目录;如需新增包或目录,必须解释其与 `internal/*` 的关系并给出后续维护策略。 + +## Complexity Tracking + +> **Fill ONLY if Constitution Check has violations that must be justified** + +| Violation | Why Needed | Simpler Alternative Rejected Because | +|-----------|------------|-------------------------------------| +| [e.g., 4th project] | [current need] | [why 3 projects insufficient] | +| [e.g., Repository pattern] | [specific problem] | [why direct DB access insufficient] | diff --git a/specs/006-module-hook-refactor/quickstart.md b/specs/006-module-hook-refactor/quickstart.md new file mode 100644 index 0000000..6169cb1 --- /dev/null +++ b/specs/006-module-hook-refactor/quickstart.md @@ -0,0 +1,44 @@ +# Quickstart: Module Hook Refactor + +1) 检出分支并安装依赖 +```bash +git checkout 006-module-hook-refactor +/home/rogee/.local/go/bin/go test ./... +``` + +2) 定义模块 Hook +```go +func init() { + proxy.RegisterModule(proxy.ModuleRegistration{ + Key: "npm", + Handler: proxyHandler, + }) + hooks.MustRegister("npm", hooks.Hooks{ + NormalizePath: myNormalize, + ResolveUpstream: myResolve, + RewriteResponse: myRewrite, + CachePolicy: myPolicy, + ContentType: myContentType, + }) +} +``` + +3) 迁移逻辑 +- 读取 `internal/proxy/handler.go` 里的类型分支,对应迁移到模块 Hook。 +- 更新模块单元测试验证缓存、路径、响应重写等行为。 + +4) 验证 +```bash +/home/rogee/.local/go/bin/go test ./... +``` +- 针对迁移模块执行“第一次 miss → 第二次 hit”端到端测试。 +- 触发缺失 handler/panic,确保返回 `module_handler_missing`/`module_handler_panic`。 + +5) 诊断检查 +```bash +curl -s http://localhost:8080/-/modules | jq '.modules[].hook_status' +``` +- 确认新模块标记为 `registered`,未注册模块显示 `missing`,legacy handler 仍可作为兜底。 +- 如果需要查看全局状态,可检查 `hook_registry` 字段,它返回每个 module_key 的注册情况。 +- `hubs[].legacy_only` 为 `true` 时表示该 Hub 仍绑定 legacy 模块;迁移完成后应显式设置 `[[Hub]].Module`。 +- 启动阶段会验证每个模块是否注册 Hook,缺失则直接退出,避免运行期静默回退。 diff --git a/specs/006-module-hook-refactor/research.md b/specs/006-module-hook-refactor/research.md new file mode 100644 index 0000000..309ca22 --- /dev/null +++ b/specs/006-module-hook-refactor/research.md @@ -0,0 +1,23 @@ +# Research: Module Hook Refactor + +## Decisions + +- **Hook Contract Scope**: 模块 Hook 将覆盖 5 个扩展点(路径/locator 重写、上游 URL 解析、响应重写、缓存策略、内容类型推断),并提供统一注册 API。 + - *Rationale*: 这些环节是当前 `handler.go` 中所有类型分支的来源;一次性覆盖可保证 proxy handler 只保留调度/缓存写入。 + - *Alternatives*: 仅重写部分(如响应)会导致残余分支;完全改写为“每模块自己的 ProxyHandler”则重复缓存代码,风险更高。 + +- **Legacy Handler Role**: 保留 legacy handler 作为默认兜底(未迁移模块或外部插件),但日志/诊断会标记为 `legacy-only`。 + - *Rationale*: 确保迁移期间功能可用,同时提示 SRE 识别未迁移模块。 + - *Alternatives*: 强制所有模块一次迁移;风险大且不利于渐进上线。 + +- **Diagnostics Visibility**: `/ - /modules` 输出增加 Hook 状态(已注册/缺失/legacy 默认),并用于 SRE 排查。 + - *Rationale*: 迁移阶段需要监控 Hook 注册情况,避免静默回退。 + - *Alternatives*: 单靠日志搜索,排查效率低。 + +- **Testing Approach**: 每个模块迁移后需要端到端“Miss → Hit”回归,同时新增 Hook 单元测试覆盖缺失/panic 等异常路径。 + - *Rationale*: 确认行为等价且新错误处理生效。 + - *Alternatives*: 仅依赖现有集成测试,不足以验证 Hook 入口。 + +## Clarifications + +- 无须额外澄清;假设各模块团队可修改其包并维护 Hook 实现。 diff --git a/specs/006-module-hook-refactor/spec.md b/specs/006-module-hook-refactor/spec.md new file mode 100644 index 0000000..d3738b1 --- /dev/null +++ b/specs/006-module-hook-refactor/spec.md @@ -0,0 +1,99 @@ +# Feature Specification: Module Hook Refactor + +**Feature Branch**: `006-module-hook-refactor` +**Created**: 2025-11-17 +**Status**: Draft +**Input**: User description: "模块内部自管理缓存/代理逻辑、proxy handler 仅负责调度" + +> 宪法对齐(v1.0.0): +> - 保持 CLI 多仓代理定位,不引入 UI 或账号体系。 +> - 仅依赖 Go 1.25+ 单二进制及 Fiber/Viper/Logrus/Lumberjack/标准库,不新增无关依赖。 +> - 全局 `config.toml` 控制所有行为;若新增配置项需描述字段、默认值、验证与迁移。 +> - 缓存优先 + 流式回源是基础能力;必须定义命中/回源/失败时的结构化日志与观测策略。 +> - 验收需覆盖配置解析、缓存读写、Host Header 绑定及中文注释要求。 + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - 定义模块 Hook 契约 (Priority: P1) + +作为平台架构师,我可以定义一套模块 Hook/Handler 契约,使缓存键、路径重写、上游解析、响应重写等逻辑都由模块实现,proxy handler 只负责调度与共享的缓存写入。 + +**Why this priority**: 没有统一 Hook,就无法让模块自控逻辑,后续迁移无法落地。 + +**Independent Test**: 提供一个示例模块实现 Hook,注册后能独立覆盖缓存策略/路径重写,proxy handler 不含类型分支仍可处理请求。 + +**Acceptance Scenarios**: + +1. **Given** 定义好 Hook 接口与注册机制,**When** 模块注册自定义 Hook,**Then** proxy handler 在日志/缓存流程中调用模块 Hook,代码中不再出现 `hub_type` 分支。 +2. **Given** 模块缺少 Hook,**When** 注册时,**Then** 启动/测试阶段给出明确错误提示,防止运行期回退到旧逻辑。 + +--- + +### User Story 2 - 迁移现有模块 (Priority: P1) + +作为平台工程师,我希望 Docker/NPM/PyPI/Composer/Go 等模块全部迁移到 Hook 模式,自行管理缓存与代理逻辑,保证行为与改造前一致。 + +**Why this priority**: 生产仓库必须可用,迁移后必须验证命中/回源与日志字段无差异。 + +**Independent Test**: 对每个仓库执行“首次 miss、二次 hit”流程并观察日志字段,与改造前对比一致。 + +**Acceptance Scenarios**: + +1. **Given** Docker 模块已实现 Hook,**When** 请求 manifest/层文件,**Then** 路径重写、缓存键、内容类型判断都由模块完成,proxy handler 不含 Docker 特化。 +2. **Given** PyPI/Composer 模块已迁移,**When** 请求 simple index、packages.json、dist 文件,**Then** 响应重写与内容类型准确,日志字段/缓存命中行为与现状一致。 + +--- + +### User Story 3 - 清理 legacy 逻辑并增强观测 (Priority: P2) + +作为 SRE,我希望 proxy handler 只提供通用调度和错误包装;模块未注册或 Hook panic 时能输出统一 5xx 与结构化日志;legacy 模块成为纯兜底实现。 + +**Why this priority**: 防止运行期静默回退,简化排查路径。 + +**Independent Test**: 刻意注入未注册模块或 Hook panic,观察返回 `module_handler_missing`/`module_handler_panic` 错误及日志字段完整。 + +**Acceptance Scenarios**: + +1. **Given** 模块未注册 Hook,**When** 发起请求,**Then** proxy handler 返回 5xx JSON 并记录 hub/module_key/request_id 等字段。 +2. **Given** Hook panic,**When** 请求执行,**Then** panic 被捕获,返回统一错误并在日志中包含 panic 信息。 + +--- + +### Edge Cases + +- 模块未注册或重复注册时需在启动阶段失败,避免运行期 fallback。 +- Hook 返回非法路径/URL 时需防止逃逸缓存目录或访问非预期上游。 +- 不同模块并行迁移时,需保证 legacy 模块仍可作为默认 handler。 +- Hook 不得影响 diagnostics (`/-/modules`) 或健康检查路径。 +- 模块 Hook 需兼容 HEAD/GET;不支持方法应返回合规状态码。 + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: 定义模块 Hook/Handler 契约,覆盖路径重写、缓存策略、上游解析、响应重写、内容类型推断等扩展点,proxy handler 不再含 `hub_type` 分支。 +- **FR-002**: 提供 Hook 注册/验证机制,要求模块在注册时同时提供元数据与 Hook;缺失或重复时启动失败并输出明确日志。 +- **FR-003**: 将 Docker/NPM/PyPI/Composer/Go 模块现有特化逻辑全部迁移到各自 Hook,实现缓存键/TTL/验证、路径 fallback、响应重写的等价行为。 +- **FR-004**: legacy/default 模块作为兜底 handler,确保未迁移模块仍可运行,但会记录“legacy-only”状态,便于观测。 +- **FR-005**: proxy handler 仅负责调度、缓存读写和日志包装;模块 Hook panic 或缺失时返回统一 5xx JSON(`module_handler_panic`/`module_handler_missing`),日志包含 hub/domain/module_key/request_id。 +- **FR-006**: Diagnostics (`/-/modules`) 需展示模块 Hook 注册状态(正常/缺失),并保持现有输出字段。 +- **FR-007**: 文档/quickstart 更新,说明如何实现 Hook、注册模块,以及如何验证新模块缓存/日志。 + +### Key Entities + +- **Module Hook**: 一组可选函数(normalize path、resolve upstream、rewrite response、cache policy、content type)。 +- **Module Registration**: 绑定 module_key、元数据、Hook handler 的机构,负责唯一性与完整性校验。 +- **Proxy Dispatcher**: 使用 module_key→Hook/handler map 调度请求,输出统一日志与错误。 + +### Assumptions + +- 现有模块的缓存策略及接口稳定,可迁移到 Hook 而无需额外外部依赖。 +- 模块团队可接受在各自包内实现 Hook;无需跨团队共享逻辑。 + +## Success Criteria *(mandatory)* + +- **SC-001**: proxy handler 代码中不包含任何 `hub_type`/类型特化分支;静态分析或代码审查确认类型判断被完全移除。 +- **SC-002**: 对 docker/npm/pypi/composer/go 每个仓执行“首次 miss、二次 hit”测试,首/次响应头与日志字段与改造前一致,功能回归通过。 +- **SC-003**: 新增模块仅需在 hubmodule 中实现 Hook 并注册,无需修改 proxy handler;示例模块演示该流程并通过集成测试。 +- **SC-004**: 缺失 handler 或 Hook panic 时返回统一 5xx JSON,日志包含 hub/domain/module_key/request_id,错误率控制在 0(测试场景)。 +- **SC-005**: `/ - /modules` 诊断接口展示所有模块 Hook 状态,SRE 可识别缺失或 legacy-only 模块;与文档描述一致。 diff --git a/specs/006-module-hook-refactor/tasks.md b/specs/006-module-hook-refactor/tasks.md new file mode 100644 index 0000000..a471263 --- /dev/null +++ b/specs/006-module-hook-refactor/tasks.md @@ -0,0 +1,100 @@ +# Tasks: Module Hook Refactor + +**Input**: Design documents from `/specs/006-module-hook-refactor/` +**Prerequisites**: plan.md, spec.md, research.md, data-model.md, contracts/, quickstart.md + +**Tests**: 包含配置解析、缓存读写、代理命中/回源、Host Header 绑定、模块 Hook 行为(缺失/异常)的端到端与单元测试。 + +**Organization**: Tasks are grouped by user story so each delivers independent value. + +## Phase 1: Setup + +- [X] T001 阅读 spec/plan/research,整理 Hook 目标与迁移范围(specs/006-module-hook-refactor/) +- [X] T002 运行现有基线 `GOCACHE=$(pwd)/.cache/go-build /home/rogee/.local/go/bin/go test ./...`,记录失败用例(当前因 handler 尚未实现 Hook 接口导致编译失败) + +--- + +## Phase 2: Foundational + +- [X] T003a 定义 Hook/RequestContext/CachePolicy 接口骨架(internal/proxy/hooks/hooks.go) +- [X] T003b [P] 实现 HookRegistry 注册/查询/重复检测,并暴露 diagnostics 所需状态(internal/proxy/hooks/registry.go, internal/server/routes/modules.go) +- [X] T003c [P] 添加 Hook 契约单元测试(internal/proxy/hooks/hooks_test.go) +- [X] T004 更新 diagnostics 接口,显示注册状态但仍未接入 handler(internal/server/routes/modules.go) +- [X] T005 建立示例模块 Hook(internal/hubmodule/template/, internal/hubmodule/template/module_test.go) +- [X] T006 更新 quickstart/README 说明 Hook 用法(specs/006-module-hook-refactor/quickstart.md, README.md) + +**Checkpoint**: Hook 契约与注册机制 ready。 + +--- + +## Phase 3: User Story 1 - 定义模块 Hook 契约 (Priority: P1) 🎯 MVP + +**Goal**: proxy handler 仅调度 + 缓存读写;Hook 提供各种扩展点。 + +**Independent Test**: 使用示例模块覆盖路径/缓存策略 → proxy handler 中无 `hub_type` 分支也可完成请求。 + +- [X] T007 [US1] 重构 handler,接入 Hook 扩展点(internal/proxy/handler.go) +- [X] T008 [P] [US1] 在 forwarder 中注入 Hook/handler 错误处理(internal/proxy/forwarder.go) +- [X] T009 [US1] 编写 Hook 单元测试覆盖缺失/重复/panic 场景(internal/proxy/hooks/, internal/proxy/forwarder_test.go) +- [X] T010 [US1] 更新 diagnostics `/ - /modules` 输出 Hook 状态(internal/server/routes/modules.go, docs) + +**Checkpoint**: Hook 契约落地并可验证。 + +--- + +## Phase 4: User Story 2 - 迁移现有模块 (Priority: P1) + +**Goal**: Docker/NPM/PyPI/Composer/Go 等模块将特化逻辑迁移到 Hook,行为保持等价。 + +**Independent Test**: 对每个仓执行“第一次 miss、第二次 hit”测试,比对日志与响应头与改造前一致。 + +- [X] T011 [US2] 迁移 Docker Hook(路径 fallback、内容类型、缓存策略等)(internal/hubmodule/docker/) +- [X] T012 [P] [US2] 迁移 npm Hook(包 metadata、tarball 缓存)(internal/hubmodule/npm/) +- [X] T013 [P] [US2] 迁移 PyPI Hook(simple HTML/JSON 重写、files 路径)(internal/hubmodule/pypi/) +- [X] T014 [P] [US2] 迁移 Composer Hook(packages.json/p2 重写、dist URL)(internal/hubmodule/composer/) +- [X] T015 [US2] 迁移 Go Hook(模组路径、sumdb 重写)(internal/hubmodule/go/) +- [X] T016 [US2] 更新 legacy/default handler 说明及行为(internal/hubmodule/legacy/, docs) +- [X] T017 [US2] 为每个模块增加/更新 e2e 测试覆盖 miss/hit 及日志字段(tests/integration/*) + +**Checkpoint**: 现有仓库 Hook 化并通过回归。 + +--- + +## Phase 5: User Story 3 - 清理 legacy 逻辑并增强观测 (Priority: P2) + +**Goal**: proxy handler 统一错误路径;legacy 仅兜底并在诊断输出 legacy-only。 + +**Independent Test**: 模拟缺失 handler 或 Hook panic,返回 `module_handler_missing`/`module_handler_panic`,日志含 hub/domain/module_key/request_id。 + +- [X] T018 [US3] 实现 handler 缺失/重复时启动失败与运行期 5xx 响应(internal/proxy/forwarder.go, internal/hubmodule/registry.go) +- [X] T019 [P] [US3] 添加 Hook panic 捕获与结构化日志(internal/proxy/forwarder.go) +- [X] T020 [US3] 扩展 diagnostics 与日志写入,使 legacy-only 模块可观测(internal/server/routes/modules.go, internal/logging/fields.go) +- [X] T021 [P] [US3] 更新文档/quickstart,描述错误处理与 legacy-only 标记(specs/006-module-hook-refactor/contracts/README.md, quickstart.md) + +**Checkpoint**: Hook 错误与 legacy 观测全面覆盖。 + +--- + +## Phase 6: Polish & Cross-Cutting + +- [X] T022 整理 README/DEVELOPMENT 文档、样例配置,指导如何创建 Hook(README.md, configs/config.example.toml) +- [X] T023 [P] 最终 `gofmt` + `GOCACHE=$(pwd)/.cache/go-build /home/rogee/.local/go/bin/go test ./...`,确保无回归 + +--- + +## Dependencies & Order + +1. Phase1 Setup → Phase2 Hook 契约 → Phase3 (US1) → Phase4 (US2) → Phase5 (US3) → Phase6 polish。 +2. US2 依赖 Hook 契约完成;US3 依赖 US1/US2。 + +## Parallel Execution Examples + +- T012/T013/T014/T015(各模块 Hook)可并行,互不干扰。 +- 文档更新(T016/T021/T022)可与测试任务并行。 +- Hook 契约(T003)完成后,可并行推进 diagnostics (T010) 与 forwarder 错误处理 (T008)。 + +## Implementation Strategy + +- **MVP**:完成 US1(T007-T010)即可让 proxy handler 不再依赖类型分支。 +- **迭代**:依次迁移模块 (US2) 并加强观测 (US3);每阶段运行 `go test ./...`。 +- **验证**:每个模块迁移后执行“Miss→Hit”回归 + 特殊错误场景测试。 diff --git a/tests/integration/hook_logging_test.go b/tests/integration/hook_logging_test.go new file mode 100644 index 0000000..98c238f --- /dev/null +++ b/tests/integration/hook_logging_test.go @@ -0,0 +1,297 @@ +package integration + +import ( + "bytes" + "context" + "io" + "net" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/gofiber/fiber/v3" + "github.com/sirupsen/logrus" + + "github.com/any-hub/any-hub/internal/cache" + "github.com/any-hub/any-hub/internal/config" + "github.com/any-hub/any-hub/internal/proxy" + "github.com/any-hub/any-hub/internal/server" +) + +func TestDockerHookEmitsLogFields(t *testing.T) { + stub := newCacheFlowStub(t, dockerManifestPath) + defer stub.Close() + + env := newHookTestEnv(t, config.Config{ + Global: config.GlobalConfig{ + ListenPort: 5300, + CacheTTL: config.Duration(time.Minute), + StoragePath: t.TempDir(), + }, + Hubs: []config.HubConfig{ + { + Name: "docker", + Domain: "docker.hook.local", + Type: "docker", + Upstream: stub.URL, + }, + }, + }) + defer env.Close() + + assertCacheMissThenHit(t, env, "docker.hook.local", dockerManifestPath) + env.AssertLogContains(t, `"module_key":"docker"`) + env.AssertLogContains(t, `"cache_hit":false`) + env.AssertLogContains(t, `"cache_hit":true`) +} + +func TestNPMHookEmitsLogFields(t *testing.T) { + stub := newUpstreamStub(t, upstreamNPM) + defer stub.Close() + + env := newHookTestEnv(t, config.Config{ + Global: config.GlobalConfig{ + ListenPort: 5310, + CacheTTL: config.Duration(time.Minute), + StoragePath: t.TempDir(), + }, + Hubs: []config.HubConfig{ + { + Name: "npm", + Domain: "npm.hook.local", + Type: "npm", + Upstream: stub.URL, + }, + }, + }) + defer env.Close() + + assertCacheMissThenHit(t, env, "npm.hook.local", "/lodash") + env.AssertLogContains(t, `"module_key":"npm"`) +} + +func TestPyPIHookEmitsLogFields(t *testing.T) { + stub := newPyPIStub(t) + defer stub.Close() + + env := newHookTestEnv(t, config.Config{ + Global: config.GlobalConfig{ + ListenPort: 5320, + CacheTTL: config.Duration(time.Minute), + StoragePath: t.TempDir(), + }, + Hubs: []config.HubConfig{ + { + Name: "pypi", + Domain: "pypi.hook.local", + Type: "pypi", + Upstream: stub.URL, + }, + }, + }) + defer env.Close() + + assertCacheMissThenHit(t, env, "pypi.hook.local", "/simple/pkg/") + env.AssertLogContains(t, `"module_key":"pypi"`) +} + +func TestComposerHookEmitsLogFields(t *testing.T) { + stub := newComposerStub(t) + defer stub.Close() + + env := newHookTestEnv(t, config.Config{ + Global: config.GlobalConfig{ + ListenPort: 5330, + CacheTTL: config.Duration(time.Minute), + StoragePath: t.TempDir(), + }, + Hubs: []config.HubConfig{ + { + Name: "composer", + Domain: "composer.hook.local", + Type: "composer", + Upstream: stub.URL, + }, + }, + }) + defer env.Close() + + assertCacheMissThenHit(t, env, "composer.hook.local", "/p2/example/package.json") + env.AssertLogContains(t, `"module_key":"composer"`) +} + +func TestGoHookEmitsLogFields(t *testing.T) { + stub := newGoStub(t) + defer stub.Close() + + env := newHookTestEnv(t, config.Config{ + Global: config.GlobalConfig{ + ListenPort: 5340, + CacheTTL: config.Duration(time.Minute), + StoragePath: t.TempDir(), + }, + Hubs: []config.HubConfig{ + { + Name: "gomod", + Domain: "go.hook.local", + Type: "go", + Upstream: stub.URL, + }, + }, + }) + defer env.Close() + + assertCacheMissThenHit(t, env, "go.hook.local", goZipPath) + env.AssertLogContains(t, `"module_key":"go"`) +} + +func assertCacheMissThenHit(t *testing.T, env hookTestEnv, host, path string) { + t.Helper() + resp := env.DoRequest(t, host, path) + if resp.StatusCode != fiber.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + t.Fatalf("expected 200 for %s%s, got %d body=%s", host, path, resp.StatusCode, string(body)) + } + if hit := resp.Header.Get("X-Any-Hub-Cache-Hit"); hit != "false" { + resp.Body.Close() + t.Fatalf("expected cache miss header, got %s", hit) + } + resp.Body.Close() + + resp2 := env.DoRequest(t, host, path) + if resp2.Header.Get("X-Any-Hub-Cache-Hit") != "true" { + resp2.Body.Close() + t.Fatalf("expected cache hit header on second request, got %s", resp2.Header.Get("X-Any-Hub-Cache-Hit")) + } + resp2.Body.Close() +} + +type hookTestEnv struct { + app *fiber.App + logs *bytes.Buffer +} + +func newHookTestEnv(t *testing.T, cfg config.Config) hookTestEnv { + t.Helper() + + registry, err := server.NewHubRegistry(&cfg) + if err != nil { + t.Fatalf("registry error: %v", err) + } + + logger := logrus.New() + buf := &bytes.Buffer{} + logger.SetFormatter(&logrus.JSONFormatter{}) + logger.SetOutput(buf) + + store, err := cache.NewStore(cfg.Global.StoragePath) + if err != nil { + t.Fatalf("store error: %v", err) + } + handler := proxy.NewHandler(server.NewUpstreamClient(&cfg), logger, store) + + app, err := server.NewApp(server.AppOptions{ + Logger: logger, + Registry: registry, + Proxy: handler, + ListenPort: cfg.Global.ListenPort, + }) + if err != nil { + t.Fatalf("app error: %v", err) + } + return hookTestEnv{app: app, logs: buf} +} + +func (env hookTestEnv) DoRequest(t *testing.T, host, path string) *http.Response { + t.Helper() + req := httptest.NewRequest(http.MethodGet, "http://"+host+path, nil) + req.Host = host + resp, err := env.app.Test(req) + if err != nil { + t.Fatalf("app.Test error: %v", err) + } + return resp +} + +func (env hookTestEnv) AssertLogContains(t *testing.T, substr string) { + t.Helper() + if !strings.Contains(env.logs.String(), substr) { + t.Fatalf("expected logs to contain %s, got %s", substr, env.logs.String()) + } +} + +func (env hookTestEnv) Close() { + _ = env.app.Shutdown() +} + +const ( + goZipPath = "/mod.example/@v/v1.0.0.zip" + goInfoPath = "/mod.example/@v/v1.0.0.info" +) + +type goStub struct { + server *http.Server + listener net.Listener + URL string + + mu sync.Mutex + hits map[string]int +} + +func newGoStub(t *testing.T) *goStub { + t.Helper() + stub := &goStub{hits: make(map[string]int)} + mux := http.NewServeMux() + mux.HandleFunc(goZipPath, stub.handleZip) + mux.HandleFunc(goInfoPath, stub.handleInfo) + + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Skipf("unable to start go stub: %v", err) + } + server := &http.Server{Handler: mux} + stub.listener = listener + stub.server = server + stub.URL = "http://" + listener.Addr().String() + + go func() { + _ = server.Serve(listener) + }() + return stub +} + +func (s *goStub) handleZip(w http.ResponseWriter, r *http.Request) { + s.record(r.URL.Path) + w.Header().Set("Content-Type", "application/zip") + _, _ = w.Write([]byte("zip-bytes")) +} + +func (s *goStub) handleInfo(w http.ResponseWriter, r *http.Request) { + s.record(r.URL.Path) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"Version":"v1.0.0"}`)) +} + +func (s *goStub) record(path string) { + s.mu.Lock() + s.hits[path]++ + s.mu.Unlock() +} + +func (s *goStub) Close() { + if s == nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + if s.server != nil { + _ = s.server.Shutdown(ctx) + } + if s.listener != nil { + _ = s.listener.Close() + } +} diff --git a/tests/integration/module_diagnostics_test.go b/tests/integration/module_diagnostics_test.go index 89b9aa3..e3820ad 100644 --- a/tests/integration/module_diagnostics_test.go +++ b/tests/integration/module_diagnostics_test.go @@ -13,6 +13,7 @@ import ( "github.com/any-hub/any-hub/internal/config" "github.com/any-hub/any-hub/internal/hubmodule" + "github.com/any-hub/any-hub/internal/proxy/hooks" "github.com/any-hub/any-hub/internal/server" "github.com/any-hub/any-hub/internal/server/routes" ) @@ -27,6 +28,7 @@ func TestModuleDiagnosticsEndpoints(t *testing.T) { "npm", }, }) + hooks.MustRegister(moduleKey, hooks.Hooks{}) cfg := &config.Config{ Global: config.GlobalConfig{ @@ -77,6 +79,7 @@ func TestModuleDiagnosticsEndpoints(t *testing.T) { Rollout string `json:"rollout_flag"` Domain string `json:"domain"` Port int `json:"port"` + LegacyOnly bool `json:"legacy_only"` } `json:"hubs"` } body, _ := io.ReadAll(resp.Body) @@ -90,6 +93,9 @@ func TestModuleDiagnosticsEndpoints(t *testing.T) { found := false for _, module := range payload.Modules { if module["key"] == moduleKey { + if module["hook_status"] != "registered" { + t.Fatalf("expected module %s hook_status registered, got %v", moduleKey, module["hook_status"]) + } found = true break } @@ -106,6 +112,9 @@ func TestModuleDiagnosticsEndpoints(t *testing.T) { if hub.ModuleKey != hubmodule.DefaultModuleKey() { t.Fatalf("legacy hub should expose legacy module, got %s", hub.ModuleKey) } + if !hub.LegacyOnly { + t.Fatalf("legacy hub should be marked legacy_only") + } case "modern-hub": if hub.ModuleKey != moduleKey { t.Fatalf("modern hub should expose %s, got %s", moduleKey, hub.ModuleKey) @@ -113,6 +122,9 @@ func TestModuleDiagnosticsEndpoints(t *testing.T) { if hub.Rollout != "dual" { t.Fatalf("modern hub rollout flag should be dual, got %s", hub.Rollout) } + if hub.LegacyOnly { + t.Fatalf("modern hub should not be marked legacy_only") + } default: t.Fatalf("unexpected hub %s", hub.HubName) }