diff --git a/README.md b/README.md index 8accbf0..b981801 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,14 @@ Phase 1 的 HTTP 服务与磁盘缓存能力详见 [`specs/002-fiber-single-prox - 构建 Fiber HTTP 服务 + Host 驱动(共享 `ListenPort`)的 Hub Registry,使 `docker.hub.local`、`npm.hub.local` 等域名在同一端口内路由到独立上游。 - 实现 `StoragePath//` 目录下的磁盘缓存,依靠文件 `mtime` + 上游 `HEAD` 请求完成动态标签的再验证。 -- 提供 Docker/NPM 示例配置、quickstart、测试桩,运行 `go test ./tests/integration` 即可验证代理/缓存流程。 +- 提供 Docker/NPM/PyPI 示例配置、quickstart、测试桩,运行 `go test ./tests/integration` 即可验证代理/缓存流程。 随着 Phase 1 推进,`cmd/any-hub` 将接入 server/cache/quickstart 章节,便于复用 Phase 0 的配置与日志骨架。 ## ListenPort 与凭证迁移指南 1. **全局端口**:在配置全局段声明 `ListenPort = `,所有 Hub 共享该端口;旧的 `[[Hub]].Port` 字段已弃用,`any-hub --check-config` 会在检测到遗留字段时直接失败。 -2. **Hub 类型**:为每个 `[[Hub]]` 添加 `Type = "docker|npm|go"`,驱动日志中的 `hub_type` 字段并预留协议特定行为;非法值会被校验阻断。 +2. **Hub 类型**:为每个 `[[Hub]]` 添加 `Type = "docker|npm|go|pypi"`,驱动日志中的 `hub_type` 字段并预留协议特定行为;非法值会被校验阻断。 3. **可选凭证**:如需突破上游限流,成对提供 `Username`/`Password`。CLI 仅在这两个字段同时出现时注入 Basic Auth,并在日志中输出掩码形式的 `auth_mode=credentialed`。 4. **验证命令**:使用 `any-hub --check-config --config ./config.toml` 快速确认迁移是否完成,成功时日志会显示 `listen_port`、`hub_type` 等字段。 diff --git a/config.example.toml b/config.example.toml index 36c2dc3..9f32f34 100644 --- a/config.example.toml +++ b/config.example.toml @@ -68,4 +68,4 @@ Upstream = "https://pypi.org" Proxy = "" Username = "" Password = "" -Type = "npm" +Type = "pypi" diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 027950e..89f4b85 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -59,6 +59,7 @@ func TestHubTypeValidation(t *testing.T) { {"docker ok", "docker", false}, {"npm ok", "npm", false}, {"go ok", "go", false}, + {"pypi ok", "pypi", false}, {"missing type", "", true}, {"unsupported type", "rubygems", true}, } diff --git a/internal/config/validation.go b/internal/config/validation.go index e76b041..d9b39c0 100644 --- a/internal/config/validation.go +++ b/internal/config/validation.go @@ -12,9 +12,10 @@ var supportedHubTypes = map[string]struct{}{ "docker": {}, "npm": {}, "go": {}, + "pypi": {}, } -const supportedHubTypeList = "docker|npm|go" +const supportedHubTypeList = "docker|npm|go|pypi" // Validate 针对语义级别做进一步校验,防止非法配置启动服务。 func (c *Config) Validate() error { diff --git a/internal/proxy/handler.go b/internal/proxy/handler.go index 59f0348..a98e824 100644 --- a/internal/proxy/handler.go +++ b/internal/proxy/handler.go @@ -345,6 +345,10 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin return "application/octet-stream" case strings.HasSuffix(clean, "/@v/list"): return "text/plain" + case strings.HasSuffix(clean, ".whl"): + return "application/octet-stream" + case strings.HasSuffix(clean, ".tar.gz"), strings.HasSuffix(clean, ".tar.bz2"): + return "application/x-tar" } if route != nil { @@ -363,6 +367,10 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin if strings.HasSuffix(clean, ".json") { return "application/json" } + case "pypi": + if strings.Contains(clean, "/simple/") { + return "text/html" + } } } @@ -372,10 +380,10 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin func buildLocator(route *server.HubRoute, c fiber.Ctx) cache.Locator { uri := c.Request().URI() pathVal := string(uri.Path()) - if pathVal == "" { - pathVal = "/" + clean := normalizeRequestPath(route, pathVal) + if newPath, ok := applyPyPISimpleFallback(route, clean); ok { + clean = newPath } - clean := path.Clean("/" + pathVal) if newPath, ok := applyDockerHubNamespaceFallback(route, clean); ok { clean = newPath } @@ -412,6 +420,18 @@ func requestPath(c fiber.Ctx) string { return pathVal } +func normalizeRequestPath(route *server.HubRoute, raw string) string { + if raw == "" { + raw = "/" + } + hasSlash := strings.HasSuffix(raw, "/") + clean := path.Clean("/" + raw) + if route != nil && route.Config.Type == "pypi" && hasSlash && clean != "/" && !strings.HasSuffix(clean, "/") { + clean += "/" + } + return clean +} + func bytesReader(b []byte) io.Reader { if len(b) == 0 { return http.NoBody @@ -422,11 +442,14 @@ func bytesReader(b []byte) io.Reader { func resolveUpstreamURL(route *server.HubRoute, base *url.URL, c fiber.Ctx) *url.URL { uri := c.Request().URI() pathVal := string(uri.Path()) - relative := &url.URL{Path: pathVal, RawPath: pathVal} - if newPath, ok := applyDockerHubNamespaceFallback(route, relative.Path); ok { - relative.Path = newPath - relative.RawPath = newPath + clean := normalizeRequestPath(route, pathVal) + if newPath, ok := applyPyPISimpleFallback(route, clean); ok { + clean = newPath } + if newPath, ok := applyDockerHubNamespaceFallback(route, clean); ok { + clean = newPath + } + relative := &url.URL{Path: clean, RawPath: clean} if query := string(uri.QueryString()); query != "" { relative.RawQuery = query } @@ -472,10 +495,10 @@ func determineCachePolicy(route *server.HubRoute, locator cache.Locator, method policy := cachePolicy{allowCache: true, allowStore: true} path := stripQueryMarker(locator.Path) switch route.Config.Type { - case "docker": - if path == "/v2" || path == "v2" || path == "/v2/" { - return cachePolicy{} - } +case "docker": + if path == "/v2" || path == "v2" || path == "/v2/" { + return cachePolicy{} + } if strings.Contains(path, "/_catalog") { return cachePolicy{} } @@ -484,21 +507,27 @@ func determineCachePolicy(route *server.HubRoute, locator cache.Locator, method } policy.requireRevalidate = true return policy - case "go": - if strings.Contains(path, "/@v/") && (strings.HasSuffix(path, ".zip") || strings.HasSuffix(path, ".mod") || strings.HasSuffix(path, ".info")) { - return policy - } - policy.requireRevalidate = true - return policy - case "npm": - if strings.Contains(path, "/-/") && strings.HasSuffix(path, ".tgz") { - return policy - } - policy.requireRevalidate = true - return policy - default: +case "go": + if strings.Contains(path, "/@v/") && (strings.HasSuffix(path, ".zip") || strings.HasSuffix(path, ".mod") || strings.HasSuffix(path, ".info")) { return policy } + policy.requireRevalidate = true + return policy +case "npm": + if strings.Contains(path, "/-/") && strings.HasSuffix(path, ".tgz") { + return policy + } + policy.requireRevalidate = true + return policy +case "pypi": + if isPyPIDistribution(path) { + return policy + } + policy.requireRevalidate = true + return policy +default: + return policy +} } func isDockerImmutablePath(path string) bool { @@ -511,6 +540,25 @@ func isDockerImmutablePath(path string) bool { return false } +func isPyPIDistribution(path string) bool { + switch { + case strings.HasSuffix(path, ".whl"): + return true + case strings.HasSuffix(path, ".tar.gz"): + return true + case strings.HasSuffix(path, ".tar.bz2"): + return true + case strings.HasSuffix(path, ".tgz"): + return true + case strings.HasSuffix(path, ".zip"): + return true + case strings.HasSuffix(path, ".egg"): + return true + default: + return false + } +} + func isCacheableStatus(status int) bool { return status == http.StatusOK } @@ -616,6 +664,23 @@ func splitDockerRepoPath(path string) (string, string, bool) { return "", "", false } +func applyPyPISimpleFallback(route *server.HubRoute, path string) (string, bool) { + if route == nil || route.Config.Type != "pypi" { + return path, false + } + if strings.HasPrefix(path, "/simple/") || strings.HasPrefix(path, "/packages/") { + return path, false + } + if strings.HasSuffix(path, ".whl") || strings.HasSuffix(path, ".tar.gz") || strings.HasSuffix(path, ".tar.bz2") || strings.HasSuffix(path, ".zip") { + return path, false + } + trimmed := strings.Trim(path, "/") + if trimmed == "" || strings.HasPrefix(trimmed, "_") { + return path, false + } + return "/simple/" + trimmed + "/", true +} + type bearerChallenge struct { Realm string Service string @@ -766,6 +831,8 @@ func ensureProxyHubType(route *server.HubRoute) error { return nil case "go": return nil + case "pypi": + return nil default: return fmt.Errorf("unsupported hub type: %s", route.Config.Type) } diff --git a/internal/server/router.go b/internal/server/router.go index 1ef7268..1421ced 100644 --- a/internal/server/router.go +++ b/internal/server/router.go @@ -144,6 +144,8 @@ func ensureRouterHubType(route *HubRoute) error { return nil case "go": return nil + case "pypi": + return nil default: return fmt.Errorf("unsupported hub type: %s", route.Config.Type) } diff --git a/tests/integration/pypi_proxy_test.go b/tests/integration/pypi_proxy_test.go new file mode 100644 index 0000000..c5ece8e --- /dev/null +++ b/tests/integration/pypi_proxy_test.go @@ -0,0 +1,241 @@ +package integration + +import ( + "context" + "io" + "net" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/gofiber/fiber/v3" + "github.com/sirupsen/logrus" + + "github.com/any-hub/any-hub/internal/cache" + "github.com/any-hub/any-hub/internal/config" + "github.com/any-hub/any-hub/internal/proxy" + "github.com/any-hub/any-hub/internal/server" +) + +func TestPyPICachePolicies(t *testing.T) { + stub := newPyPIStub(t) + defer stub.Close() + + storageDir := t.TempDir() + cfg := &config.Config{ + Global: config.GlobalConfig{ + ListenPort: 5000, + CacheTTL: config.Duration(30 * time.Second), + StoragePath: storageDir, + }, + Hubs: []config.HubConfig{ + { + Name: "pypi", + Domain: "pypi.hub.local", + Type: "pypi", + Upstream: stub.URL, + }, + }, + } + + registry, err := server.NewHubRegistry(cfg) + if err != nil { + t.Fatalf("registry error: %v", err) + } + + logger := logrus.New() + logger.SetOutput(io.Discard) + + store, err := cache.NewStore(storageDir) + if err != nil { + t.Fatalf("store error: %v", err) + } + + handler := proxy.NewHandler(server.NewUpstreamClient(cfg), logger, store) + + app, err := server.NewApp(server.AppOptions{ + Logger: logger, + Registry: registry, + Proxy: handler, + ListenPort: 5000, + }) + if err != nil { + t.Fatalf("app error: %v", err) + } + + doRequest := func(path string) *http.Response { + req := httptest.NewRequest("GET", "http://pypi.hub.local"+path, nil) + req.Host = "pypi.hub.local" + resp, err := app.Test(req) + if err != nil { + t.Fatalf("app.Test error: %v", err) + } + return resp + } + + simplePath := "/simple/pkg/" + resp := doRequest(simplePath) + if resp.StatusCode != fiber.StatusOK { + t.Fatalf("expected 200 for simple index, got %d", resp.StatusCode) + } + if resp.Header.Get("X-Any-Hub-Cache-Hit") != "false" { + t.Fatalf("expected miss for first simple request") + } + resp.Body.Close() + + resp2 := doRequest(simplePath) + if resp2.Header.Get("X-Any-Hub-Cache-Hit") != "true" { + t.Fatalf("expected cached simple response after HEAD revalidation") + } + resp2.Body.Close() + + if stub.simpleHeadHits != 1 { + t.Fatalf("expected single HEAD for simple index, got %d", stub.simpleHeadHits) + } + + stub.UpdateSimple([]byte("updated")) + resp3 := doRequest(simplePath) + if resp3.Header.Get("X-Any-Hub-Cache-Hit") != "false" { + t.Fatalf("expected miss after simple index update") + } + resp3.Body.Close() + + if stub.simpleHits != 2 { + t.Fatalf("expected second GET for updated index, got %d", stub.simpleHits) + } + if stub.simpleHeadHits != 2 { + t.Fatalf("expected second HEAD before refresh, got %d", stub.simpleHeadHits) + } + + wheelPath := "/packages/foo/foo-1.0-py3-none-any.whl" + respWheel := doRequest(wheelPath) + if respWheel.StatusCode != fiber.StatusOK { + t.Fatalf("expected 200 for wheel, got %d", respWheel.StatusCode) + } + if respWheel.Header.Get("X-Any-Hub-Cache-Hit") != "false" { + t.Fatalf("expected miss for first wheel request") + } + respWheel.Body.Close() + + respWheel2 := doRequest(wheelPath) + if respWheel2.Header.Get("X-Any-Hub-Cache-Hit") != "true" { + t.Fatalf("expected cached wheel response without revalidation") + } + respWheel2.Body.Close() + + if stub.wheelHeadHits != 0 { + t.Fatalf("wheel path should not perform HEAD, got %d", stub.wheelHeadHits) + } + + // bare project path should fallback to /simple//. + bareResp := doRequest("/pkg/") + if bareResp.StatusCode != fiber.StatusOK { + body, _ := io.ReadAll(bareResp.Body) + t.Fatalf("expected fallback success for bare path, got %d body=%s", bareResp.StatusCode, string(body)) + } + bareResp.Body.Close() +} + +type pypiStub struct { + server *http.Server + listener net.Listener + URL string + + mu sync.Mutex + simpleHits int + simpleHeadHits int + wheelHits int + wheelHeadHits int + simpleBody []byte + wheelBody []byte + lastSimpleMod string +} + +func newPyPIStub(t *testing.T) *pypiStub { + t.Helper() + stub := &pypiStub{ + simpleBody: []byte("ok"), + wheelBody: []byte("wheel-bytes"), + lastSimpleMod: time.Now().UTC().Format(http.TimeFormat), + } + + mux := http.NewServeMux() + mux.HandleFunc("/simple/pkg/", stub.handleSimple) + mux.HandleFunc("/packages/foo/foo-1.0-py3-none-any.whl", stub.handleWheel) + + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Skipf("unable to start pypi stub: %v", err) + } + + server := &http.Server{Handler: mux} + stub.server = server + stub.listener = listener + stub.URL = "http://" + listener.Addr().String() + + go func() { + _ = server.Serve(listener) + }() + + return stub +} + +func (s *pypiStub) handleSimple(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + if r.Method == http.MethodHead { + s.simpleHeadHits++ + } else { + s.simpleHits++ + } + body := append([]byte(nil), s.simpleBody...) + lastMod := s.lastSimpleMod + s.mu.Unlock() + + w.Header().Set("Content-Type", "text/html") + w.Header().Set("Last-Modified", lastMod) + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusOK) + _, _ = w.Write(body) +} + +func (s *pypiStub) handleWheel(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + if r.Method == http.MethodHead { + s.wheelHeadHits++ + } else { + s.wheelHits++ + } + body := append([]byte(nil), s.wheelBody...) + s.mu.Unlock() + + w.Header().Set("Content-Type", "application/octet-stream") + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusOK) + _, _ = w.Write(body) +} + +func (s *pypiStub) UpdateSimple(body []byte) { + s.mu.Lock() + defer s.mu.Unlock() + s.simpleBody = append([]byte(nil), body...) + s.lastSimpleMod = time.Now().UTC().Format(http.TimeFormat) +} + +func (s *pypiStub) Close() { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + if s.server != nil { + _ = s.server.Shutdown(ctx) + } + if s.listener != nil { + _ = s.listener.Close() + } +}