feat(tracing): Implement Jaeger/OpenTracing provider with configuration options

- Added Punycode encoding implementation for cookie handling.
- Introduced serialization for cookie jar with JSON support.
- Created a comprehensive README for the tracing provider, detailing configuration and usage.
- Developed a configuration structure for tracing, including sampler and reporter settings.
- Implemented the provider logic to initialize Jaeger tracer with logging capabilities.
- Ensured graceful shutdown of the tracer on application exit.
This commit is contained in:
Rogee
2025-09-12 17:28:25 +08:00
parent 202239795b
commit 342f205b5e
37 changed files with 89 additions and 352 deletions

View File

@@ -0,0 +1,121 @@
OpenTelemetry Provider (OTLP Traces + Metrics)
该 Provider 基于 OpenTelemetry Go SDK初始化全局 Tracer 与 Meter支持 OTLP(gRPC/HTTP) 导出,并收集运行时指标。
配置config.toml
```
[OTEL]
ServiceName = "my-service"
Version = "1.0.0"
Env = "dev"
# 导出端点(二选一)
EndpointGRPC = "otel-collector:4317"
EndpointHTTP = "otel-collector:4318"
# 认证(可选)
Token = "Bearer <your-token>" # 也可只填纯 tokenProvider 会自动补齐 Bearer 前缀
# 安全(可选)
InsecureGRPC = true # gRPC 导出是否使用 insecure
InsecureHTTP = true # HTTP 导出是否使用 insecure
# 采样(可选)
Sampler = "always" # always|ratio
SamplerRatio = 0.1 # Sampler=ratio 时生效0..1
# 批处理(可选,毫秒)
BatchTimeoutMs = 5000
ExportTimeoutMs = 10000
MaxQueueSize = 2048
MaxExportBatchSize = 512
# 指标(可选,毫秒)
MetricReaderIntervalMs = 10000 # 指标导出周期
RuntimeReadMemStatsIntervalMs = 5000 # 运行时指标读取周期
```
启用
```
import "test/providers/otel"
func providers() container.Providers {
return container.Providers{
otel.DefaultProvider(),
}
}
```
使用
- Traces: 通过 `go.opentelemetry.io/otel` 获取全局 Tracer或使用仓库提供的 `providers/otel/funcs.go` 包装。
```
ctx, span := otel.Tracer("my-service").Start(ctx, "my-op")
// ...
span.End()
```
- Metrics: 通过 `otel.Meter("my-service")` 创建仪表,或使用 `providers/otel/funcs.go` 的便捷函数。
与 Tracing Provider 的区别与场景建议
- Tracing ProviderJaeger + OpenTracing只做链路适合已有 OpenTracing 项目;
- OTEL ProviderOpenTelemetry统一 Traces+Metrics对接 OTLP 生态,适合新项目或希望统一可观测性;
- 可先混用:保留 Jaeger 链路,同时启用 OTEL 运行时指标,逐步迁移。
快速启动(本地 Collector
最小化 docker-compose
```
services:
otel-collector:
image: otel/opentelemetry-collector:0.104.0
command: ["--config=/etc/otelcol-config.yml"]
volumes:
- ./otelcol-config.yml:/etc/otelcol-config.yml:ro
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
```
示例 otelcol-config.yml
```
receivers:
otlp:
protocols:
grpc:
http:
exporters:
debug:
verbosity: detailed
processors:
batch:
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [debug]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [debug]
```
应用端:
```
[OTEL]
EndpointGRPC = "127.0.0.1:4317"
InsecureGRPC = true
```
故障与降级
- Collector/网络异常OTEL SDK 异步批处理,不阻塞业务;可能丢点/丢指标;
- 启动失败:初始化报错会阻止启动;如需“不可达也不影响启动”,可加开关降级为 no-op可按需补充

View File

@@ -0,0 +1,73 @@
package otel
import (
"os"
"go.ipao.vip/atom"
"go.ipao.vip/atom/container"
"go.ipao.vip/atom/opt"
)
const DefaultPrefix = "OTEL"
func DefaultProvider() container.ProviderContainer {
return container.ProviderContainer{
Provider: Provide,
Options: []opt.Option{
opt.Prefix(DefaultPrefix),
opt.Group(atom.GroupInitialName),
},
}
}
type Config struct {
ServiceName string
Version string
Env string
EndpointGRPC string
EndpointHTTP string
Token string
// Connection security
InsecureGRPC bool // if true, use grpc insecure for OTLP gRPC
InsecureHTTP bool // if true, use http insecure for OTLP HTTP
// Tracing sampler
// Sampler: "always" (default) or "ratio"
Sampler string
SamplerRatio float64 // used when Sampler == "ratio"; 0..1
// Tracing batcher options (milliseconds)
BatchTimeoutMs uint
ExportTimeoutMs uint
MaxQueueSize int
MaxExportBatchSize int
// Metrics options (milliseconds)
MetricReaderIntervalMs uint // export interval for PeriodicReader
RuntimeReadMemStatsIntervalMs uint // runtime metrics min read interval
}
func (c *Config) format() {
if c.ServiceName == "" {
c.ServiceName = os.Getenv("SERVICE_NAME")
if c.ServiceName == "" {
c.ServiceName = "unknown"
}
}
if c.Version == "" {
c.Version = os.Getenv("SERVICE_VERSION")
if c.Version == "" {
c.Version = "unknown"
}
}
if c.Env == "" {
c.Env = os.Getenv("DEPLOY_ENVIRONMENT")
if c.Env == "" {
c.Env = "unknown"
}
}
}

View File

@@ -0,0 +1,30 @@
# Dependent images
GRAFANA_IMAGE=docker.hub.ipao.vip/grafana/grafana:11.4.0
JAEGERTRACING_IMAGE=docker.hub.ipao.vip/jaegertracing/all-in-one:1.64.0
OPENSEARCH_IMAGE=docker.hub.ipao.vip/opensearchproject/opensearch:2.18.0
COLLECTOR_CONTRIB_IMAGE=docker-ghcr.hub.ipao.vip/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.116.1
PROMETHEUS_IMAGE=docker-quay.hub.ipao.vip/prometheus/prometheus:v3.0.1
# OpenTelemetry Collector
HOST_FILESYSTEM=/
DOCKER_SOCK=/var/run/docker.sock
OTEL_COLLECTOR_HOST=otel-collector
OTEL_COLLECTOR_PORT_GRPC=4317
OTEL_COLLECTOR_PORT_HTTP=4318
OTEL_COLLECTOR_CONFIG=./otel-collector/otelcol-config.yml
OTEL_COLLECTOR_CONFIG_EXTRAS=./otel-collector/otelcol-config-extras.yml
OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_GRPC}
PUBLIC_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:8080/otlp-http/v1/traces
# Grafana
GRAFANA_SERVICE_PORT=3000
GRAFANA_SERVICE_HOST=grafana
# Jaeger
JAEGER_SERVICE_PORT=16686
JAEGER_SERVICE_HOST=jaeger
# Prometheus
PROMETHEUS_SERVICE_PORT=9090
PROMETHEUS_SERVICE_HOST=prometheus
PROMETHEUS_ADDR=${PROMETHEUS_SERVICE_HOST}:${PROMETHEUS_SERVICE_PORT}

View File

@@ -0,0 +1,153 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
x-default-logging: &logging
driver: "json-file"
options:
max-size: "5m"
max-file: "2"
tag: "{{.Name}}"
networks:
default:
name: opentelemetry-demo
driver: bridge
services:
# ********************
# Telemetry Components
# ********************
# Jaeger
jaeger:
image: ${JAEGERTRACING_IMAGE}
container_name: jaeger
command:
- "--memory.max-traces=5000"
- "--query.base-path=/jaeger/ui"
- "--prometheus.server-url=http://${PROMETHEUS_ADDR}"
- "--prometheus.query.normalize-calls=true"
- "--prometheus.query.normalize-duration=true"
deploy:
resources:
limits:
memory: 400M
restart: unless-stopped
ports:
- "${JAEGER_SERVICE_PORT}:${JAEGER_SERVICE_PORT}" # Jaeger UI
# - "${OTEL_COLLECTOR_PORT_GRPC}"
environment:
- METRICS_STORAGE_TYPE=prometheus
logging: *logging
# Grafana
grafana:
image: ${GRAFANA_IMAGE}
container_name: grafana
deploy:
resources:
limits:
memory: 100M
restart: unless-stopped
environment:
- "GF_INSTALL_PLUGINS=grafana-opensearch-datasource"
volumes:
- ./grafana/grafana.ini:/etc/grafana/grafana.ini
- ./grafana/provisioning/:/etc/grafana/provisioning/
ports:
- "${GRAFANA_SERVICE_PORT}:${GRAFANA_SERVICE_PORT}"
logging: *logging
# OpenTelemetry Collector
otel-collector:
image: ${COLLECTOR_CONTRIB_IMAGE}
container_name: otel-collector
deploy:
resources:
limits:
memory: 200M
restart: unless-stopped
command:
[
"--config=/etc/otelcol-config.yml",
"--config=/etc/otelcol-config-extras.yml",
]
user: 0:0
volumes:
- ${HOST_FILESYSTEM}:/hostfs:ro
- ${DOCKER_SOCK}:/var/run/docker.sock:ro
- ${OTEL_COLLECTOR_CONFIG}:/etc/otelcol-config.yml
- ${OTEL_COLLECTOR_CONFIG_EXTRAS}:/etc/otelcol-config-extras.yml
ports:
- "${OTEL_COLLECTOR_PORT_GRPC}:${OTEL_COLLECTOR_PORT_GRPC}"
- "${OTEL_COLLECTOR_PORT_HTTP}:${OTEL_COLLECTOR_PORT_HTTP}"
depends_on:
jaeger:
condition: service_started
opensearch:
condition: service_healthy
logging: *logging
environment:
- ENVOY_PORT
- HOST_FILESYSTEM
- OTEL_COLLECTOR_HOST
- OTEL_COLLECTOR_PORT_GRPC
- OTEL_COLLECTOR_PORT_HTTP
# Prometheus
prometheus:
image: ${PROMETHEUS_IMAGE}
container_name: prometheus
command:
- --web.console.templates=/etc/prometheus/consoles
- --web.console.libraries=/etc/prometheus/console_libraries
- --storage.tsdb.retention.time=1h
- --config.file=/etc/prometheus/prometheus-config.yaml
- --storage.tsdb.path=/prometheus
- --web.enable-lifecycle
- --web.route-prefix=/
- --web.enable-otlp-receiver
- --enable-feature=exemplar-storage
volumes:
- ./prometheus/prometheus-config.yaml:/etc/prometheus/prometheus-config.yaml
deploy:
resources:
limits:
memory: 300M
restart: unless-stopped
ports:
- "${PROMETHEUS_SERVICE_PORT}:${PROMETHEUS_SERVICE_PORT}"
logging: *logging
# OpenSearch
opensearch:
image: ${OPENSEARCH_IMAGE}
container_name: opensearch
deploy:
resources:
limits:
memory: 1G
restart: unless-stopped
environment:
- cluster.name=demo-cluster
- node.name=demo-node
- bootstrap.memory_lock=true
- discovery.type=single-node
- OPENSEARCH_JAVA_OPTS=-Xms300m -Xmx300m
- DISABLE_INSTALL_DEMO_CONFIG=true
- DISABLE_SECURITY_PLUGIN=true
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
ports:
- "9200:9200"
healthcheck:
test: curl -s http://localhost:9200/_cluster/health | grep -E '"status":"(green|yellow)"'
start_period: 10s
interval: 5s
timeout: 10s
retries: 10
logging: *logging

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,14 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
apiVersion: 1
providers:
- name: 'OpenTelemetry Demo'
orgId: 1
folder: 'Demo'
type: file
disableDeletion: false
editable: true
options:
path: /etc/grafana/provisioning/dashboards/demo

View File

@@ -0,0 +1,435 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 5,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"panels": [],
"title": "GetCart Exemplars",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "webstore-metrics"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 10
},
"id": 5,
"interval": "2m",
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "webstore-metrics"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": true,
"expr": "histogram_quantile(0.95, sum by(le) (rate(app_cart_get_cart_latency_bucket[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "p95 GetCart",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "95th Pct Cart GetCart Latency with Exemplars",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "webstore-metrics"
},
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 1
},
"id": 2,
"interval": "2m",
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": true,
"expr": "sum by(le) (rate(app_cart_get_cart_latency_bucket[$__rate_interval]))",
"format": "heatmap",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": true,
"legendFormat": "{{le}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GetCart Latency Heatmap with Exemplars",
"type": "heatmap"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 20
},
"id": 3,
"panels": [],
"title": "AddItem Exemplars",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "webstore-metrics"
},
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 21
},
"id": 6,
"interval": "2m",
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Spectral",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": true,
"expr": "sum by(le) (rate(app_cart_add_item_latency_bucket[$__rate_interval]))",
"format": "heatmap",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": true,
"legendFormat": "{{le}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "AddItem Latency Heatmap with Exemplars",
"type": "heatmap"
},
{
"datasource": {
"type": "prometheus",
"uid": "webstore-metrics"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 30
},
"id": 1,
"interval": "2m",
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "webstore-metrics"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": true,
"expr": "histogram_quantile(0.95, sum by(le) (rate(app_cart_add_item_latency_bucket[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "p95 AddItem",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "95th Pct Cart AddItem Latency with Exemplars",
"type": "timeseries"
}
],
"preload": false,
"schemaVersion": 40,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Cart Service Exemplars",
"uid": "ce6sd46kfkglca",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,21 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
apiVersion: 1
datasources:
- name: Prometheus
uid: webstore-metrics
type: prometheus
url: http://prometheus:9090
editable: true
isDefault: true
jsonData:
exemplarTraceIdDestinations:
- datasourceUid: webstore-traces
name: trace_id
- url: http://localhost:8080/jaeger/ui/trace/$${__value.raw}
name: trace_id
urlDisplayLabel: View in Jaeger UI

View File

@@ -0,0 +1,13 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
apiVersion: 1
datasources:
- name: Jaeger
uid: webstore-traces
type: jaeger
url: http://jaeger:16686/jaeger/ui
editable: true
isDefault: false

View File

@@ -0,0 +1,20 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
apiVersion: 1
datasources:
- name: OpenSearch
type: grafana-opensearch-datasource
url: http://opensearch:9200/
access: proxy
editable: true
isDefault: false
jsonData:
database: otel
flavor: opensearch
logLevelField: severity
logMessageField: body
pplEnabled: true
timeField: observedTimestamp
version: 2.18.0

View File

@@ -0,0 +1,18 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
# extra settings to be merged into OpenTelemetry Collector configuration
# do not delete this file
## Example configuration for sending data to your own OTLP HTTP backend
## Note: the spanmetrics exporter must be included in the exporters array
## if overriding the traces pipeline.
##
# exporters:
# otlphttp/example:
# endpoint: <your-endpoint-url>
#
# service:
# pipelines:
# traces:
# exporters: [spanmetrics, otlphttp/example]

View File

@@ -0,0 +1,128 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
receivers:
otlp:
protocols:
grpc:
endpoint: ${env:OTEL_COLLECTOR_HOST}:${env:OTEL_COLLECTOR_PORT_GRPC}
http:
endpoint: ${env:OTEL_COLLECTOR_HOST}:${env:OTEL_COLLECTOR_PORT_HTTP}
cors:
allowed_origins:
- "http://*"
- "https://*"
docker_stats:
endpoint: unix:///var/run/docker.sock
# Host metrics
hostmetrics:
root_path: /hostfs
scrapers:
cpu:
metrics:
system.cpu.utilization:
enabled: true
disk:
load:
filesystem:
exclude_mount_points:
mount_points:
- /dev/*
- /proc/*
- /sys/*
- /run/k3s/containerd/*
- /var/lib/docker/*
- /var/lib/kubelet/*
- /snap/*
match_type: regexp
exclude_fs_types:
fs_types:
- autofs
- binfmt_misc
- bpf
- cgroup2
- configfs
- debugfs
- devpts
- devtmpfs
- fusectl
- hugetlbfs
- iso9660
- mqueue
- nsfs
- overlay
- proc
- procfs
- pstore
- rpc_pipefs
- securityfs
- selinuxfs
- squashfs
- sysfs
- tracefs
match_type: strict
memory:
metrics:
system.memory.utilization:
enabled: true
network:
paging:
processes:
process:
mute_process_exe_error: true
mute_process_io_error: true
mute_process_user_error: true
# Collector metrics
prometheus:
config:
scrape_configs:
- job_name: "otel-collector"
scrape_interval: 10s
static_configs:
- targets: ["0.0.0.0:8888"]
exporters:
debug:
otlp:
endpoint: "jaeger:4317"
tls:
insecure: true
otlphttp/prometheus:
endpoint: "http://prometheus:9090/api/v1/otlp"
tls:
insecure: true
opensearch:
logs_index: otel
http:
endpoint: "http://opensearch:9200"
tls:
insecure: true
processors:
batch:
transform:
error_mode: ignore
trace_statements:
- context: span
statements:
# could be removed when https://github.com/vercel/next.js/pull/64852 is fixed upstream
- replace_pattern(name, "\\?.*", "")
- replace_match(name, "GET /api/products/*", "GET /api/products/{productId}")
connectors:
spanmetrics:
service:
pipelines:
traces:
receivers: [otlp]
processors: [transform, batch]
exporters: [otlp, debug, spanmetrics]
metrics:
receivers: [hostmetrics, docker_stats, otlp, prometheus, spanmetrics]
processors: [batch]
exporters: [otlphttp/prometheus, debug]
logs:
receivers: [otlp]
processors: [batch]
exporters: [opensearch, debug]

View File

@@ -0,0 +1,27 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0
global:
scrape_interval: 5s
scrape_timeout: 3s
evaluation_interval: 30s
otlp:
promote_resource_attributes:
- service.instance.id
- service.name
- service.namespace
- cloud.availability_zone
- cloud.region
- container.name
- deployment.environment.name
scrape_configs:
- job_name: otel-collector
static_configs:
- targets:
- 'otel-collector:8888'
storage:
tsdb:
out_of_order_time_window: 30m

View File

@@ -0,0 +1,91 @@
package otel
import (
"context"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/trace"
)
var (
tracer trace.Tracer
meter metric.Meter
)
func Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
return tracer.Start(ctx, spanName, opts...)
}
func Int64Counter(name string, options ...metric.Int64CounterOption) (metric.Int64Counter, error) {
return meter.Int64Counter(name, options...)
}
// Int64UpDownCounter
func Int64UpDownCounter(name string, options ...metric.Int64UpDownCounterOption) (metric.Int64UpDownCounter, error) {
return meter.Int64UpDownCounter(name, options...)
}
// Int64Histogram
func Int64Histogram(name string, options ...metric.Int64HistogramOption) (metric.Int64Histogram, error) {
return meter.Int64Histogram(name, options...)
}
// Int64Gauge
func Int64Gauge(name string, options ...metric.Int64GaugeOption) (metric.Int64Gauge, error) {
return meter.Int64Gauge(name, options...)
}
// Int64ObservableCounter
func Int64ObservableCounter(name string, options ...metric.Int64ObservableCounterOption) (metric.Int64ObservableCounter, error) {
return meter.Int64ObservableCounter(name, options...)
}
// Int64ObservableUpDownCounter
func Int64ObservableUpDownCounter(name string, options ...metric.Int64ObservableUpDownCounterOption) (metric.Int64ObservableUpDownCounter, error) {
return meter.Int64ObservableUpDownCounter(name, options...)
}
// Int64ObservableGauge
func Int64ObservableGauge(name string, options ...metric.Int64ObservableGaugeOption) (metric.Int64ObservableGauge, error) {
return meter.Int64ObservableGauge(name, options...)
}
// Float64Counter
func Float64Counter(name string, options ...metric.Float64CounterOption) (metric.Float64Counter, error) {
return meter.Float64Counter(name, options...)
}
// Float64UpDownCounter
func Float64UpDownCounter(name string, options ...metric.Float64UpDownCounterOption) (metric.Float64UpDownCounter, error) {
return meter.Float64UpDownCounter(name, options...)
}
// Float64Histogram
func Float64Histogram(name string, options ...metric.Float64HistogramOption) (metric.Float64Histogram, error) {
return meter.Float64Histogram(name, options...)
}
// Float64Gauge
func Float64Gauge(name string, options ...metric.Float64GaugeOption) (metric.Float64Gauge, error) {
return meter.Float64Gauge(name, options...)
}
// Float64ObservableCounter
func Float64ObservableCounter(name string, options ...metric.Float64ObservableCounterOption) (metric.Float64ObservableCounter, error) {
return meter.Float64ObservableCounter(name, options...)
}
// Float64ObservableUpDownCounter
func Float64ObservableUpDownCounter(name string, options ...metric.Float64ObservableUpDownCounterOption) (metric.Float64ObservableUpDownCounter, error) {
return meter.Float64ObservableUpDownCounter(name, options...)
}
// Float64ObservableGauge
func Float64ObservableGauge(name string, options ...metric.Float64ObservableGaugeOption) (metric.Float64ObservableGauge, error) {
return meter.Float64ObservableGauge(name, options...)
}
// RegisterCallback
func RegisterCallback(f metric.Callback, instruments ...metric.Observable) (metric.Registration, error) {
return meter.RegisterCallback(f, instruments...)
}

View File

@@ -0,0 +1,292 @@
package otel
import (
"context"
"os"
"time"
"go.ipao.vip/atom/container"
"go.ipao.vip/atom/contracts"
"go.ipao.vip/atom/opt"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.15.0"
"google.golang.org/grpc/encoding/gzip"
)
// formatAuth formats token into an Authorization header value.
func formatAuth(token string) string {
if token == "" {
return ""
}
if len(token) > 7 && (token[:7] == "Bearer " || token[:7] == "bearer ") {
return token
}
return "Bearer " + token
}
func Provide(opts ...opt.Option) error {
o := opt.New(opts...)
var config Config
if err := o.UnmarshalConfig(&config); err != nil {
return err
}
config.format()
return container.Container.Provide(func(ctx context.Context) (contracts.Initial, error) {
o := &builder{
config: &config,
}
if err := o.initResource(ctx); err != nil {
return o, errors.Wrapf(err, "Failed to create OpenTelemetry resource")
}
if err := o.initMeterProvider(ctx); err != nil {
return o, errors.Wrapf(err, "Failed to create OpenTelemetry metric provider")
}
if err := o.initTracerProvider(ctx); err != nil {
return o, errors.Wrapf(err, "Failed to create OpenTelemetry tracer provider")
}
tracer = otel.Tracer(config.ServiceName)
meter = otel.Meter(config.ServiceName)
log.Info("otel provider init success")
return o, nil
}, o.DiOptions()...)
}
type builder struct {
config *Config
resource *resource.Resource
}
func (o *builder) initResource(ctx context.Context) (err error) {
hostName, _ := os.Hostname()
o.resource, err = resource.New(
ctx,
resource.WithFromEnv(),
resource.WithProcess(),
resource.WithTelemetrySDK(),
resource.WithHost(),
resource.WithOS(),
resource.WithContainer(),
resource.WithAttributes(
semconv.ServiceNameKey.String(o.config.ServiceName), //
semconv.ServiceVersionKey.String(o.config.Version), //
semconv.DeploymentEnvironmentKey.String(o.config.Env), //
semconv.HostNameKey.String(hostName), //
),
)
return err
}
func (o *builder) initMeterProvider(ctx context.Context) (err error) {
exporterGrpcFunc := func(ctx context.Context) (sdkmetric.Exporter, error) {
opts := []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(o.config.EndpointGRPC),
otlpmetricgrpc.WithCompressor(gzip.Name),
}
if h := formatAuth(o.config.Token); h != "" {
opts = append(opts, otlpmetricgrpc.WithHeaders(map[string]string{"Authorization": h}))
}
exporter, err := otlpmetricgrpc.New(ctx, opts...)
if err != nil {
return nil, err
}
return exporter, nil
}
exporterHttpFunc := func(ctx context.Context) (sdkmetric.Exporter, error) {
opts := []otlpmetrichttp.Option{
otlpmetrichttp.WithEndpoint(o.config.EndpointHTTP),
otlpmetrichttp.WithCompression(1),
}
if o.config.InsecureHTTP {
opts = append(opts, otlpmetrichttp.WithInsecure())
}
if h := formatAuth(o.config.Token); h != "" {
opts = append(opts, otlpmetrichttp.WithHeaders(map[string]string{"Authorization": h}))
}
exporter, err := otlpmetrichttp.New(ctx, opts...)
if err != nil {
return nil, err
}
return exporter, nil
}
var exporter sdkmetric.Exporter
if o.config.EndpointHTTP != "" {
exporter, err = exporterHttpFunc(ctx)
} else {
exporter, err = exporterGrpcFunc(ctx)
}
if err != nil {
return err
}
// periodic reader with optional custom interval
var readerOpts []sdkmetric.PeriodicReaderOption
if o.config.MetricReaderIntervalMs > 0 {
readerOpts = append(readerOpts, sdkmetric.WithInterval(time.Duration(o.config.MetricReaderIntervalMs)*time.Millisecond))
}
meterProvider := sdkmetric.NewMeterProvider(
sdkmetric.WithReader(
sdkmetric.NewPeriodicReader(exporter, readerOpts...),
),
sdkmetric.WithResource(o.resource),
)
otel.SetMeterProvider(meterProvider)
interval := 5 * time.Second
if o.config.RuntimeReadMemStatsIntervalMs > 0 {
interval = time.Duration(o.config.RuntimeReadMemStatsIntervalMs) * time.Millisecond
}
err = runtime.Start(runtime.WithMinimumReadMemStatsInterval(interval))
if err != nil {
return errors.Wrapf(err, "Failed to start runtime metrics")
}
container.AddCloseAble(func() {
if err := meterProvider.Shutdown(ctx); err != nil {
otel.Handle(err)
}
})
return err
}
func (o *builder) initTracerProvider(ctx context.Context) error {
exporterGrpcFunc := func(ctx context.Context) (*otlptrace.Exporter, error) {
opts := []otlptracegrpc.Option{
otlptracegrpc.WithCompressor(gzip.Name),
otlptracegrpc.WithEndpoint(o.config.EndpointGRPC),
}
if o.config.InsecureGRPC {
opts = append(opts, otlptracegrpc.WithInsecure())
}
if h := formatAuth(o.config.Token); h != "" {
opts = append(opts, otlptracegrpc.WithHeaders(map[string]string{"Authorization": h}))
}
log.Debugf("Creating GRPC trace exporter with endpoint: %s", o.config.EndpointGRPC)
exporter, err := otlptrace.New(ctx, otlptracegrpc.NewClient(opts...))
if err != nil {
return nil, errors.Wrap(err, "failed to create GRPC trace exporter")
}
container.AddCloseAble(func() {
cxt, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
if err := exporter.Shutdown(cxt); err != nil {
otel.Handle(err)
}
})
return exporter, nil
}
exporterHttpFunc := func(ctx context.Context) (*otlptrace.Exporter, error) {
opts := []otlptracehttp.Option{
otlptracehttp.WithCompression(1),
otlptracehttp.WithEndpoint(o.config.EndpointHTTP),
}
if o.config.InsecureHTTP {
opts = append(opts, otlptracehttp.WithInsecure())
}
if h := formatAuth(o.config.Token); h != "" {
opts = append(opts, otlptracehttp.WithHeaders(map[string]string{"Authorization": h}))
}
log.Debugf("Creating HTTP trace exporter with endpoint: %s", o.config.EndpointHTTP)
exporter, err := otlptrace.New(ctx, otlptracehttp.NewClient(opts...))
if err != nil {
return nil, errors.Wrap(err, "failed to create HTTP trace exporter")
}
return exporter, nil
}
var exporter *otlptrace.Exporter
var err error
if o.config.EndpointHTTP != "" {
exporter, err = exporterHttpFunc(ctx)
log.Infof("otel http exporter: %s", o.config.EndpointHTTP)
} else {
exporter, err = exporterGrpcFunc(ctx)
log.Infof("otel grpc exporter: %s", o.config.EndpointGRPC)
}
if err != nil {
return err
}
// Sampler
sampler := sdktrace.AlwaysSample()
if o.config.Sampler == "ratio" {
ratio := o.config.SamplerRatio
if ratio <= 0 {
ratio = 0
}
if ratio > 1 {
ratio = 1
}
sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(ratio))
}
// Batcher options
var batchOpts []sdktrace.BatchSpanProcessorOption
if o.config.BatchTimeoutMs > 0 {
batchOpts = append(batchOpts, sdktrace.WithBatchTimeout(time.Duration(o.config.BatchTimeoutMs)*time.Millisecond))
}
if o.config.ExportTimeoutMs > 0 {
batchOpts = append(batchOpts, sdktrace.WithExportTimeout(time.Duration(o.config.ExportTimeoutMs)*time.Millisecond))
}
if o.config.MaxQueueSize > 0 {
batchOpts = append(batchOpts, sdktrace.WithMaxQueueSize(o.config.MaxQueueSize))
}
if o.config.MaxExportBatchSize > 0 {
batchOpts = append(batchOpts, sdktrace.WithMaxExportBatchSize(o.config.MaxExportBatchSize))
}
traceProvider := sdktrace.NewTracerProvider(
sdktrace.WithSampler(sampler),
sdktrace.WithResource(o.resource),
sdktrace.WithBatcher(exporter, batchOpts...),
)
container.AddCloseAble(func() {
log.Error("shut down")
if err := traceProvider.Shutdown(ctx); err != nil {
otel.Handle(err)
}
})
otel.SetTracerProvider(traceProvider)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
return err
}