添加对 PyPI 的支持,包括配置示例、缓存策略和集成测试

This commit is contained in:
2025-11-14 13:11:02 +08:00
parent 39ebf61572
commit 9444531e3b
7 changed files with 340 additions and 28 deletions

View File

@@ -8,14 +8,14 @@ Phase 1 的 HTTP 服务与磁盘缓存能力详见 [`specs/002-fiber-single-prox
- 构建 Fiber HTTP 服务 + Host 驱动(共享 `ListenPort`)的 Hub Registry使 `docker.hub.local``npm.hub.local` 等域名在同一端口内路由到独立上游。
- 实现 `StoragePath/<Hub>/<path>` 目录下的磁盘缓存,依靠文件 `mtime` + 上游 `HEAD` 请求完成动态标签的再验证。
- 提供 Docker/NPM 示例配置、quickstart、测试桩运行 `go test ./tests/integration` 即可验证代理/缓存流程。
- 提供 Docker/NPM/PyPI 示例配置、quickstart、测试桩运行 `go test ./tests/integration` 即可验证代理/缓存流程。
随着 Phase 1 推进,`cmd/any-hub` 将接入 server/cache/quickstart 章节,便于复用 Phase 0 的配置与日志骨架。
## ListenPort 与凭证迁移指南
1. **全局端口**:在配置全局段声明 `ListenPort = <port>`,所有 Hub 共享该端口;旧的 `[[Hub]].Port` 字段已弃用,`any-hub --check-config` 会在检测到遗留字段时直接失败。
2. **Hub 类型**:为每个 `[[Hub]]` 添加 `Type = "docker|npm|go"`,驱动日志中的 `hub_type` 字段并预留协议特定行为;非法值会被校验阻断。
2. **Hub 类型**:为每个 `[[Hub]]` 添加 `Type = "docker|npm|go|pypi"`,驱动日志中的 `hub_type` 字段并预留协议特定行为;非法值会被校验阻断。
3. **可选凭证**:如需突破上游限流,成对提供 `Username`/`Password`。CLI 仅在这两个字段同时出现时注入 Basic Auth并在日志中输出掩码形式的 `auth_mode=credentialed`
4. **验证命令**:使用 `any-hub --check-config --config ./config.toml` 快速确认迁移是否完成,成功时日志会显示 `listen_port``hub_type` 等字段。

View File

@@ -68,4 +68,4 @@ Upstream = "https://pypi.org"
Proxy = ""
Username = ""
Password = ""
Type = "npm"
Type = "pypi"

View File

@@ -59,6 +59,7 @@ func TestHubTypeValidation(t *testing.T) {
{"docker ok", "docker", false},
{"npm ok", "npm", false},
{"go ok", "go", false},
{"pypi ok", "pypi", false},
{"missing type", "", true},
{"unsupported type", "rubygems", true},
}

View File

@@ -12,9 +12,10 @@ var supportedHubTypes = map[string]struct{}{
"docker": {},
"npm": {},
"go": {},
"pypi": {},
}
const supportedHubTypeList = "docker|npm|go"
const supportedHubTypeList = "docker|npm|go|pypi"
// Validate 针对语义级别做进一步校验,防止非法配置启动服务。
func (c *Config) Validate() error {

View File

@@ -345,6 +345,10 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin
return "application/octet-stream"
case strings.HasSuffix(clean, "/@v/list"):
return "text/plain"
case strings.HasSuffix(clean, ".whl"):
return "application/octet-stream"
case strings.HasSuffix(clean, ".tar.gz"), strings.HasSuffix(clean, ".tar.bz2"):
return "application/x-tar"
}
if route != nil {
@@ -363,6 +367,10 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin
if strings.HasSuffix(clean, ".json") {
return "application/json"
}
case "pypi":
if strings.Contains(clean, "/simple/") {
return "text/html"
}
}
}
@@ -372,10 +380,10 @@ func inferCachedContentType(route *server.HubRoute, locator cache.Locator) strin
func buildLocator(route *server.HubRoute, c fiber.Ctx) cache.Locator {
uri := c.Request().URI()
pathVal := string(uri.Path())
if pathVal == "" {
pathVal = "/"
clean := normalizeRequestPath(route, pathVal)
if newPath, ok := applyPyPISimpleFallback(route, clean); ok {
clean = newPath
}
clean := path.Clean("/" + pathVal)
if newPath, ok := applyDockerHubNamespaceFallback(route, clean); ok {
clean = newPath
}
@@ -412,6 +420,18 @@ func requestPath(c fiber.Ctx) string {
return pathVal
}
func normalizeRequestPath(route *server.HubRoute, raw string) string {
if raw == "" {
raw = "/"
}
hasSlash := strings.HasSuffix(raw, "/")
clean := path.Clean("/" + raw)
if route != nil && route.Config.Type == "pypi" && hasSlash && clean != "/" && !strings.HasSuffix(clean, "/") {
clean += "/"
}
return clean
}
func bytesReader(b []byte) io.Reader {
if len(b) == 0 {
return http.NoBody
@@ -422,11 +442,14 @@ func bytesReader(b []byte) io.Reader {
func resolveUpstreamURL(route *server.HubRoute, base *url.URL, c fiber.Ctx) *url.URL {
uri := c.Request().URI()
pathVal := string(uri.Path())
relative := &url.URL{Path: pathVal, RawPath: pathVal}
if newPath, ok := applyDockerHubNamespaceFallback(route, relative.Path); ok {
relative.Path = newPath
relative.RawPath = newPath
clean := normalizeRequestPath(route, pathVal)
if newPath, ok := applyPyPISimpleFallback(route, clean); ok {
clean = newPath
}
if newPath, ok := applyDockerHubNamespaceFallback(route, clean); ok {
clean = newPath
}
relative := &url.URL{Path: clean, RawPath: clean}
if query := string(uri.QueryString()); query != "" {
relative.RawQuery = query
}
@@ -472,10 +495,10 @@ func determineCachePolicy(route *server.HubRoute, locator cache.Locator, method
policy := cachePolicy{allowCache: true, allowStore: true}
path := stripQueryMarker(locator.Path)
switch route.Config.Type {
case "docker":
if path == "/v2" || path == "v2" || path == "/v2/" {
return cachePolicy{}
}
case "docker":
if path == "/v2" || path == "v2" || path == "/v2/" {
return cachePolicy{}
}
if strings.Contains(path, "/_catalog") {
return cachePolicy{}
}
@@ -484,21 +507,27 @@ func determineCachePolicy(route *server.HubRoute, locator cache.Locator, method
}
policy.requireRevalidate = true
return policy
case "go":
if strings.Contains(path, "/@v/") && (strings.HasSuffix(path, ".zip") || strings.HasSuffix(path, ".mod") || strings.HasSuffix(path, ".info")) {
return policy
}
policy.requireRevalidate = true
return policy
case "npm":
if strings.Contains(path, "/-/") && strings.HasSuffix(path, ".tgz") {
return policy
}
policy.requireRevalidate = true
return policy
default:
case "go":
if strings.Contains(path, "/@v/") && (strings.HasSuffix(path, ".zip") || strings.HasSuffix(path, ".mod") || strings.HasSuffix(path, ".info")) {
return policy
}
policy.requireRevalidate = true
return policy
case "npm":
if strings.Contains(path, "/-/") && strings.HasSuffix(path, ".tgz") {
return policy
}
policy.requireRevalidate = true
return policy
case "pypi":
if isPyPIDistribution(path) {
return policy
}
policy.requireRevalidate = true
return policy
default:
return policy
}
}
func isDockerImmutablePath(path string) bool {
@@ -511,6 +540,25 @@ func isDockerImmutablePath(path string) bool {
return false
}
func isPyPIDistribution(path string) bool {
switch {
case strings.HasSuffix(path, ".whl"):
return true
case strings.HasSuffix(path, ".tar.gz"):
return true
case strings.HasSuffix(path, ".tar.bz2"):
return true
case strings.HasSuffix(path, ".tgz"):
return true
case strings.HasSuffix(path, ".zip"):
return true
case strings.HasSuffix(path, ".egg"):
return true
default:
return false
}
}
func isCacheableStatus(status int) bool {
return status == http.StatusOK
}
@@ -616,6 +664,23 @@ func splitDockerRepoPath(path string) (string, string, bool) {
return "", "", false
}
func applyPyPISimpleFallback(route *server.HubRoute, path string) (string, bool) {
if route == nil || route.Config.Type != "pypi" {
return path, false
}
if strings.HasPrefix(path, "/simple/") || strings.HasPrefix(path, "/packages/") {
return path, false
}
if strings.HasSuffix(path, ".whl") || strings.HasSuffix(path, ".tar.gz") || strings.HasSuffix(path, ".tar.bz2") || strings.HasSuffix(path, ".zip") {
return path, false
}
trimmed := strings.Trim(path, "/")
if trimmed == "" || strings.HasPrefix(trimmed, "_") {
return path, false
}
return "/simple/" + trimmed + "/", true
}
type bearerChallenge struct {
Realm string
Service string
@@ -766,6 +831,8 @@ func ensureProxyHubType(route *server.HubRoute) error {
return nil
case "go":
return nil
case "pypi":
return nil
default:
return fmt.Errorf("unsupported hub type: %s", route.Config.Type)
}

View File

@@ -144,6 +144,8 @@ func ensureRouterHubType(route *HubRoute) error {
return nil
case "go":
return nil
case "pypi":
return nil
default:
return fmt.Errorf("unsupported hub type: %s", route.Config.Type)
}

View File

@@ -0,0 +1,241 @@
package integration
import (
"context"
"io"
"net"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
"github.com/gofiber/fiber/v3"
"github.com/sirupsen/logrus"
"github.com/any-hub/any-hub/internal/cache"
"github.com/any-hub/any-hub/internal/config"
"github.com/any-hub/any-hub/internal/proxy"
"github.com/any-hub/any-hub/internal/server"
)
func TestPyPICachePolicies(t *testing.T) {
stub := newPyPIStub(t)
defer stub.Close()
storageDir := t.TempDir()
cfg := &config.Config{
Global: config.GlobalConfig{
ListenPort: 5000,
CacheTTL: config.Duration(30 * time.Second),
StoragePath: storageDir,
},
Hubs: []config.HubConfig{
{
Name: "pypi",
Domain: "pypi.hub.local",
Type: "pypi",
Upstream: stub.URL,
},
},
}
registry, err := server.NewHubRegistry(cfg)
if err != nil {
t.Fatalf("registry error: %v", err)
}
logger := logrus.New()
logger.SetOutput(io.Discard)
store, err := cache.NewStore(storageDir)
if err != nil {
t.Fatalf("store error: %v", err)
}
handler := proxy.NewHandler(server.NewUpstreamClient(cfg), logger, store)
app, err := server.NewApp(server.AppOptions{
Logger: logger,
Registry: registry,
Proxy: handler,
ListenPort: 5000,
})
if err != nil {
t.Fatalf("app error: %v", err)
}
doRequest := func(path string) *http.Response {
req := httptest.NewRequest("GET", "http://pypi.hub.local"+path, nil)
req.Host = "pypi.hub.local"
resp, err := app.Test(req)
if err != nil {
t.Fatalf("app.Test error: %v", err)
}
return resp
}
simplePath := "/simple/pkg/"
resp := doRequest(simplePath)
if resp.StatusCode != fiber.StatusOK {
t.Fatalf("expected 200 for simple index, got %d", resp.StatusCode)
}
if resp.Header.Get("X-Any-Hub-Cache-Hit") != "false" {
t.Fatalf("expected miss for first simple request")
}
resp.Body.Close()
resp2 := doRequest(simplePath)
if resp2.Header.Get("X-Any-Hub-Cache-Hit") != "true" {
t.Fatalf("expected cached simple response after HEAD revalidation")
}
resp2.Body.Close()
if stub.simpleHeadHits != 1 {
t.Fatalf("expected single HEAD for simple index, got %d", stub.simpleHeadHits)
}
stub.UpdateSimple([]byte("<html>updated</html>"))
resp3 := doRequest(simplePath)
if resp3.Header.Get("X-Any-Hub-Cache-Hit") != "false" {
t.Fatalf("expected miss after simple index update")
}
resp3.Body.Close()
if stub.simpleHits != 2 {
t.Fatalf("expected second GET for updated index, got %d", stub.simpleHits)
}
if stub.simpleHeadHits != 2 {
t.Fatalf("expected second HEAD before refresh, got %d", stub.simpleHeadHits)
}
wheelPath := "/packages/foo/foo-1.0-py3-none-any.whl"
respWheel := doRequest(wheelPath)
if respWheel.StatusCode != fiber.StatusOK {
t.Fatalf("expected 200 for wheel, got %d", respWheel.StatusCode)
}
if respWheel.Header.Get("X-Any-Hub-Cache-Hit") != "false" {
t.Fatalf("expected miss for first wheel request")
}
respWheel.Body.Close()
respWheel2 := doRequest(wheelPath)
if respWheel2.Header.Get("X-Any-Hub-Cache-Hit") != "true" {
t.Fatalf("expected cached wheel response without revalidation")
}
respWheel2.Body.Close()
if stub.wheelHeadHits != 0 {
t.Fatalf("wheel path should not perform HEAD, got %d", stub.wheelHeadHits)
}
// bare project path should fallback to /simple/<name>/.
bareResp := doRequest("/pkg/")
if bareResp.StatusCode != fiber.StatusOK {
body, _ := io.ReadAll(bareResp.Body)
t.Fatalf("expected fallback success for bare path, got %d body=%s", bareResp.StatusCode, string(body))
}
bareResp.Body.Close()
}
type pypiStub struct {
server *http.Server
listener net.Listener
URL string
mu sync.Mutex
simpleHits int
simpleHeadHits int
wheelHits int
wheelHeadHits int
simpleBody []byte
wheelBody []byte
lastSimpleMod string
}
func newPyPIStub(t *testing.T) *pypiStub {
t.Helper()
stub := &pypiStub{
simpleBody: []byte("<html>ok</html>"),
wheelBody: []byte("wheel-bytes"),
lastSimpleMod: time.Now().UTC().Format(http.TimeFormat),
}
mux := http.NewServeMux()
mux.HandleFunc("/simple/pkg/", stub.handleSimple)
mux.HandleFunc("/packages/foo/foo-1.0-py3-none-any.whl", stub.handleWheel)
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Skipf("unable to start pypi stub: %v", err)
}
server := &http.Server{Handler: mux}
stub.server = server
stub.listener = listener
stub.URL = "http://" + listener.Addr().String()
go func() {
_ = server.Serve(listener)
}()
return stub
}
func (s *pypiStub) handleSimple(w http.ResponseWriter, r *http.Request) {
s.mu.Lock()
if r.Method == http.MethodHead {
s.simpleHeadHits++
} else {
s.simpleHits++
}
body := append([]byte(nil), s.simpleBody...)
lastMod := s.lastSimpleMod
s.mu.Unlock()
w.Header().Set("Content-Type", "text/html")
w.Header().Set("Last-Modified", lastMod)
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write(body)
}
func (s *pypiStub) handleWheel(w http.ResponseWriter, r *http.Request) {
s.mu.Lock()
if r.Method == http.MethodHead {
s.wheelHeadHits++
} else {
s.wheelHits++
}
body := append([]byte(nil), s.wheelBody...)
s.mu.Unlock()
w.Header().Set("Content-Type", "application/octet-stream")
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write(body)
}
func (s *pypiStub) UpdateSimple(body []byte) {
s.mu.Lock()
defer s.mu.Unlock()
s.simpleBody = append([]byte(nil), body...)
s.lastSimpleMod = time.Now().UTC().Format(http.TimeFormat)
}
func (s *pypiStub) Close() {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if s.server != nil {
_ = s.server.Shutdown(ctx)
}
if s.listener != nil {
_ = s.listener.Close()
}
}