diff --git a/build.go b/build.go index 5e74168a5b4690132d3ea40957438c85c307a858..69f09865b02ab3f720673227e6d47fa0f557c9b0 100644 --- a/build.go +++ b/build.go @@ -12,9 +12,12 @@ import ( "path/filepath" "time" + spb "code.fbi.h-da.de/danet/api/go/gosdn/southbound" + "github.com/docker/docker/api/types" "github.com/docker/docker/client" "github.com/docker/docker/pkg/archive" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" ) @@ -30,6 +33,9 @@ type ErrorDetail struct { } func buildImage(d Deployment, dockerClient *client.Client) error { + labels := prometheus.Labels{"type": spb.Type_CONTAINERISED.String()} + start := promStartHook(labels, buildsTotal) + ctx, cancel := context.WithTimeout(context.Background(), time.Second*120) defer cancel() @@ -54,6 +60,7 @@ func buildImage(d Deployment, dockerClient *client.Client) error { if err != nil { return err } + promEndHook(labels, start, buildDurationSecondsTotal, buildDurationSeconds) return nil } @@ -76,8 +83,10 @@ func print(rd io.Reader) error { } func buildPlugin(id string) error { - var stderr bytes.Buffer + labels := prometheus.Labels{"type": spb.Type_PLUGIN.String()} + start := promStartHook(labels, buildsTotal) + var stderr bytes.Buffer buildDir := id goModDownload := exec.Command("go", "mod", "tidy") goModDownload.Dir = buildDir @@ -105,5 +114,6 @@ func buildPlugin(id string) error { log.Error(stderr.String()) return err } + promEndHook(labels, start, buildDurationSecondsTotal, buildDurationSeconds) return nil } diff --git a/build/ci/.build-container.yml b/build/ci/.build-container.yml index f6ee826e93dd27f725aeb1bd9e87b63b09abb511..d95762f2a8e8218267466dfe89439a9feb59daa9 100644 --- a/build/ci/.build-container.yml +++ b/build/ci/.build-container.yml @@ -35,7 +35,7 @@ build-docker: script: - echo "$CI_REGISTRY_PASSWORD" | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY - - docker build -t $DOCKER_IMAGE_SHA . + - DOCKER_BUILDKIT=1 docker build -t $DOCKER_IMAGE_SHA . - docker push $DOCKER_IMAGE_SHA - docker tag $DOCKER_IMAGE_SHA $TAG - docker push $TAG diff --git a/build/ci/.test.yml b/build/ci/.test.yml index f58f502cec507c5b9ce147189237549e915fad82..795bffdcfe398446380a42150ba730bf94a99732 100644 --- a/build/ci/.test.yml +++ b/build/ci/.test.yml @@ -11,7 +11,7 @@ unit-test: script: - - go test $(go list ./...) -v -coverprofile=coverage.out + - go test $(go list ./...) -coverprofile=coverage.out after_script: - go tool cover -func=coverage.out <<: *test \ No newline at end of file diff --git a/deployment.go b/deployment.go index dcc58a803d55bdd8a46adc32fb42f296a72f66e3..3b8e68c7d311f6e8a44df65f225ec1f8779fe042 100644 --- a/deployment.go +++ b/deployment.go @@ -117,6 +117,7 @@ func setup(garbageCollectionInterval time.Duration) (chan<- uuid.UUID, <-chan De out <- activeDeployments[id] case deployment := <-in: log.Tracef("updating deployment %v", deployment.ID) + deploymentsTotal.Set(float64(len(activeDeployments))) activeDeployments[deployment.ID] = deployment case ctx := <-shutdown: for k, v := range activeDeployments { @@ -139,6 +140,7 @@ func garbageCollector(ctx context.Context, activeDeployments map[uuid.UUID]Deplo for k, v := range activeDeployments { if v.State == pb.State_DECOMMISSIONED { delete(activeDeployments, k) + deploymentsTotal.Set(float64(len(activeDeployments))) if err := os.RemoveAll(k.String()); err != nil { log.Error(err) } @@ -151,11 +153,11 @@ func garbageCollector(ctx context.Context, activeDeployments map[uuid.UUID]Deplo }() select { case duration := <-done: - log.WithFields(log.Fields{ - "duration (ms)": duration.Milliseconds(), - }).Infof("garbage collection finished") + gcDurationSeconds.Observe(duration.Seconds()) + gcDurationSecondsTotal.Add(duration.Seconds()) return nil case <-ctx.Done(): + gcTimeoutsTotal.Inc() return fmt.Errorf("garbage collection timed out") } } diff --git a/generate.go b/generate.go index d3a6a27bf4a396f5dd6a9a2a88546a0416f75e4e..aa3984e623e49acaea29e47391579bdb9f125fb3 100644 --- a/generate.go +++ b/generate.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/openconfig/goyang/pkg/yang" + "github.com/prometheus/client_golang/prometheus" pb "code.fbi.h-da.de/danet/api/go/gosdn/csbi" spb "code.fbi.h-da.de/danet/api/go/gosdn/southbound" @@ -25,6 +26,10 @@ func init() { // and written to Disk. Depending on the southbound.Type additional files // for either containerised or plugin mode are created. func Generate(ctx context.Context, models []*gpb.ModelData, repository Repository, sbiType spb.Type) (Deployment, error) { + labels := prometheus.Labels{"type": sbiType.String()} + start := promStartHook(labels, codeGenerationsTotal) + defer promEndHook(labels, start, codeGenerationDurationSecondsTotal, codeGenerationDurationSeconds) + codeGenerationNumberOfModels.With(labels).Observe(float64(len(models))) id := uuid.New() if models == nil { @@ -34,7 +39,7 @@ func Generate(ctx context.Context, models []*gpb.ModelData, repository Repositor yangFiles, errs := repository.FindYANGFiles(models) for _, err := range errs { - log.Error(err) + log.Error(promHandleError(labels, err, codeGenerationErrorsTotal)) } if len(yangFiles) == 0 { return Deployment{}, fmt.Errorf("no yang files found, too many errors") @@ -62,7 +67,7 @@ func Generate(ctx context.Context, models []*gpb.ModelData, repository Repositor searchpath, err := repository.YANGPathsWithSuffix() if err != nil { - return Deployment{}, err + return Deployment{}, promHandleError(labels, err, codeGenerationErrorsTotal) } code, errs := generator.GenerateGoCode(yangFiles, searchpath) for _, e := range errs { @@ -71,7 +76,7 @@ func Generate(ctx context.Context, models []*gpb.ModelData, repository Repositor model := strings.Split(splitted[0], "/")[1] generator.Config.ParseOptions.ExcludeModules = append(generator.Config.ParseOptions.ExcludeModules, model) } - log.Warnf("error during first round %v", e) + log.Warnf("error during first round %v", promHandleError(labels, e, codeGenerationErrorsTotal)) } log.Infof("excluded models: %v", generator.Config.ParseOptions.ExcludeModules) @@ -82,15 +87,17 @@ func Generate(ctx context.Context, models []*gpb.ModelData, repository Repositor } if len(errs) != 0 { - log.Errorf("%v errors during code generation", len(errs)) + n := len(errs) + log.Errorf("%v errors during code generation", n) + codeGenerationErrorsTotal.With(labels).Add(float64(n)) } if code == nil { - return Deployment{}, fmt.Errorf("code generation failed") + return Deployment{}, promHandleError(labels, fmt.Errorf("code generation failed"), codeGenerationErrorsTotal) } if err := write(ctx, code, id.String(), sbiType); err != nil { - return Deployment{}, err + return Deployment{}, promHandleError(labels, err, codeGenerationErrorsTotal) } return Deployment{ State: pb.State_ANNOUNCED, diff --git a/go.mod b/go.mod index 3f638bd4113b7741701c8a673f65d9078cc05c40..ba95c5080aef75669a569efc82575d44f6194c40 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/openconfig/goyang v0.2.7 github.com/openconfig/ygot v0.11.2 github.com/opencontainers/runc v1.0.0 // indirect + github.com/prometheus/client_golang v1.9.0 github.com/sirupsen/logrus v1.8.1 github.com/spf13/cobra v1.2.1 github.com/spf13/viper v1.8.1 diff --git a/go.sum b/go.sum index 7b38e70e1f42753fae1296ad7963abbd913514ed..04309f7e1e3acfb884427099693c15122a65a2b6 100644 --- a/go.sum +++ b/go.sum @@ -127,6 +127,7 @@ github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZw github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= @@ -146,7 +147,9 @@ github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QH github.com/cenkalti/backoff/v4 v4.0.0/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/cenkalti/backoff/v4 v4.1.0/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= @@ -594,6 +597,7 @@ github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNx github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/mholt/archiver/v3 v3.5.0/go.mod h1:qqTTPUK/HZPFgFQ/TJ3BzvTpF/dPtFVJXdQbCmeMxwc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= @@ -755,6 +759,7 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.9.0 h1:Rrch9mh17XcxvEu9D9DEpb4isxjGBtcevQjKvxPRQIU= github.com/prometheus/client_golang v1.9.0/go.mod h1:FqZLKOZnGdFAhOK4nqGHa7D66IdsO+O441Eve7ptJDU= github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= @@ -762,6 +767,7 @@ github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1: github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.0.0-20180110214958-89604d197083/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= @@ -772,6 +778,7 @@ github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+ github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.15.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= +github.com/prometheus/common v0.18.0 h1:WCVKW7aL6LEe1uryfI9dnEc2ZqNB1Fn0ok930v0iL1Y= github.com/prometheus/common v0.18.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= github.com/prometheus/procfs v0.0.0-20180125133057-cb4147076ac7/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= @@ -784,6 +791,7 @@ github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDa github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= diff --git a/grpc.go b/grpc.go index a03ec66e7c084fa8a332fba151e608df842ae3be..d914cfdeb5006c366e584f4f2d83ce1ab1127ee8 100644 --- a/grpc.go +++ b/grpc.go @@ -10,6 +10,7 @@ import ( pb "code.fbi.h-da.de/danet/api/go/gosdn/csbi" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" @@ -36,13 +37,16 @@ type server struct { } func (s server) Get(ctx context.Context, req *pb.GetRequest) (*pb.GetResponse, error) { + labels := prometheus.Labels{"rpc": "get"} + start := time.Now() + defer promEndHook(labels, start, grpcRequestDurationSecondsTotal, grpcRequestDurationSeconds) + deployments := make([]*pb.Deployment, len(req.Did)) for i, id := range req.Did { deploymentID, _ := uuid.Parse(id) dep, err := s.orchestrator.Get(deploymentID) if err != nil { - log.Error(err) - return nil, status.Errorf(codes.Aborted, "%v", err) + return nil, handleRPCError(labels, err) } deployments[i] = &pb.Deployment{ Id: dep.ID.String(), @@ -58,18 +62,20 @@ func (s server) Get(ctx context.Context, req *pb.GetRequest) (*pb.GetResponse, e } func (s server) Create(ctx context.Context, req *pb.CreateRequest) (*pb.CreateResponse, error) { + labels := prometheus.Labels{"rpc": "create"} + start := promStartHook(labels, grpcRequestsTotal) + defer promEndHook(labels, start, grpcRequestDurationSecondsTotal, grpcRequestDurationSeconds) + deployments := make([]*pb.Deployment, len(req.TransportOption)) for i, opt := range req.TransportOption { model, err := Discover(ctx, opt) if err != nil { - log.Error(err) - return nil, status.Errorf(codes.Aborted, "%v", err) + return nil, handleRPCError(labels, err) } ctx = context.WithValue(ctx, "target-address", opt.Address) //nolint d, err := s.orchestrator.Build(ctx, model) if err != nil { - log.Error(err) - return nil, status.Errorf(codes.Aborted, "%v", err) + return nil, handleRPCError(labels, err) } deployments[i] = &pb.Deployment{ Id: d.ID.String(), @@ -84,27 +90,27 @@ func (s server) Create(ctx context.Context, req *pb.CreateRequest) (*pb.CreateRe } func (s server) CreatePlugin(req *pb.CreateRequest, stream pb.Csbi_CreatePluginServer) error { + labels := prometheus.Labels{"rpc": "create_plugin"} + start := promStartHook(labels, grpcRequestsTotal) + defer promEndHook(labels, start, grpcRequestDurationSecondsTotal, grpcRequestDurationSeconds) + ctx := context.Background() for _, opt := range req.TransportOption { model, err := Discover(ctx, opt) if err != nil { - log.Error(err) - return status.Errorf(codes.Aborted, "%v", err) + return handleRPCError(labels, err) } d, err := Generate(ctx, model, s.orchestrator.Repository(), opt.Type) if err != nil { - log.Error(err) - return status.Errorf(codes.Aborted, "%v", err) + return handleRPCError(labels, err) } err = buildPlugin(d.ID.String()) if err != nil { - log.Error(err) - return status.Errorf(codes.Aborted, "%v", err) + return handleRPCError(labels, err) } file, err := os.Open(filepath.Join(d.ID.String(), "plugin.so")) if err != nil { - log.Error(err) - return status.Errorf(codes.Aborted, "%v", err) + return handleRPCError(labels, err) } defer file.Close() @@ -122,8 +128,7 @@ func (s server) CreatePlugin(req *pb.CreateRequest, stream pb.Csbi_CreatePluginS payload := &pb.Payload{Chunk: buffer[:n]} err = stream.Send(payload) if err != nil { - log.Error(err) - return status.Errorf(codes.Aborted, "%v", err) + return handleRPCError(labels, err) } } } @@ -131,12 +136,15 @@ func (s server) CreatePlugin(req *pb.CreateRequest, stream pb.Csbi_CreatePluginS } func (s server) Delete(ctx context.Context, req *pb.DeleteRequest) (*pb.DeleteResponse, error) { + labels := prometheus.Labels{"rpc": "delete"} + start := promStartHook(labels, grpcRequestsTotal) + defer promEndHook(labels, start, grpcRequestDurationSecondsTotal, grpcRequestDurationSeconds) + for _, id := range req.Did { log.Infof("processing deletion request for %v", id) deploymentID, _ := uuid.Parse(id) if err := s.orchestrator.Destroy(ctx, deploymentID); err != nil { - log.Error(err) - return nil, status.Errorf(codes.Aborted, "%v", err) + return nil, handleRPCError(labels, err) } } return &pb.DeleteResponse{ @@ -144,3 +152,7 @@ func (s server) Delete(ctx context.Context, req *pb.DeleteRequest) (*pb.DeleteRe Status: pb.DeleteResponse_OK, }, nil } + +func handleRPCError(labels prometheus.Labels, err error) error { + return status.Errorf(codes.Aborted, "%v", promHandleError(labels, err, grpcAPIErrorsTotal)) +} diff --git a/http.go b/http.go new file mode 100644 index 0000000000000000000000000000000000000000..08e631ac780e12f497aad6ad14bef3a2938d69ef --- /dev/null +++ b/http.go @@ -0,0 +1,47 @@ +package csbi + +import ( + "context" + "fmt" + "net/http" + + "github.com/prometheus/client_golang/prometheus/promhttp" + log "github.com/sirupsen/logrus" +) + +var httpServer *http.Server + +func stopHttpServer(ctx context.Context) error { + log.Info("shutting down http server") + return httpServer.Shutdown(ctx) +} + +func registerHttpHandler() { + defer func() { + if r := recover(); r != nil { + fmt.Println("Recovered in f", r) + } + }() + http.HandleFunc("/livez", healthCheck) + http.HandleFunc("/readyz", readynessCheck) + http.Handle("/metrics", promhttp.Handler()) +} + +func startHttpServer() { + registerHttpHandler() + httpServer = &http.Server{Addr: ":9338"} + go func() { + log.Infof("starting http endpoints. listening to %v", httpServer.Addr) + if err := httpServer.ListenAndServe(); err != nil { + log.Error(err) + } + }() +} + +func healthCheck(writer http.ResponseWriter, request *http.Request) { + writer.WriteHeader(http.StatusOK) +} + +func readynessCheck(writer http.ResponseWriter, request *http.Request) { + writer.WriteHeader(http.StatusOK) +} diff --git a/http_test.go b/http_test.go new file mode 100644 index 0000000000000000000000000000000000000000..c73ca7aca92de6025417b3b45e7b0a1f3fa5330a --- /dev/null +++ b/http_test.go @@ -0,0 +1,59 @@ +package csbi + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +func Test_httpServer(t *testing.T) { + tests := []struct { + name string + endpoint string + handler func(http.ResponseWriter, *http.Request) + want *http.Response + wantErr bool + }{ + { + name: "livez", + endpoint: "/livez", + handler: healthCheck, + want: &http.Response{StatusCode: http.StatusOK}, + wantErr: false, + }, + { + name: "readyz", + endpoint: "/readyz", + handler: readynessCheck, + want: &http.Response{StatusCode: http.StatusOK}, + wantErr: false, + }, + { + name: "metrics", + endpoint: "/metrics", + handler: promhttp.Handler().ServeHTTP, + want: &http.Response{StatusCode: http.StatusOK}, + wantErr: false, + }, + } + startHttpServer() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req, err := http.NewRequest("GET", tt.endpoint, nil) + if err != nil { + t.Fatal(err) + } + rr := httptest.NewRecorder() + handler := http.HandlerFunc(tt.handler) + + handler.ServeHTTP(rr, req) + + if status := rr.Code; status != http.StatusOK { + t.Errorf("handler returned wrong status code: got %v want %v", + status, http.StatusOK) + } + }) + } +} diff --git a/metrics.go b/metrics.go new file mode 100644 index 0000000000000000000000000000000000000000..fb0c90834fb10a5c452fa46e3f56ffacc4bbfaeb --- /dev/null +++ b/metrics.go @@ -0,0 +1,213 @@ +package csbi + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + log "github.com/sirupsen/logrus" +) + +var ( + grpcRequestsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "grpc_requests_total", + Help: "Total number of gRPC requests sent to the API", + }, + []string{"rpc"}, + ) + + grpcAPIErrorsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "grpc_api_errors_total", + Help: "Total number of errors returned by the API", + }, + []string{"rpc", "error"}, + ) + + grpcRequestDurationSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "grpc_request_duration_seconds_total", + Help: "Cumulative time required to handle gRPC requests", + }, + []string{"rpc"}, + ) + + grpcRequestDurationSeconds = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "grpc_request_duration_seconds", + Help: "Histogram of gRPC request handling times", + }, + []string{"rpc"}, + ) + + buildsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "builds_total", + Help: "Total number of builds", + }, + []string{"type"}, + ) + + buildDurationSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "build_duration_seconds_total", + Help: "Total time needed for builds", + }, + []string{"type"}, + ) + + buildDurationSeconds = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "build_duration_seconds", + Help: "Histogram of build times", + }, + []string{"type"}, + ) + + deploymentsTotal = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "deployments_total", + Help: "Total number of deployments", + }, + ) + + gcDurationSecondsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "garbage_collection_duration_seconds_total", + Help: "Total time in seconds required for garbage collection", + }, + ) + + gcDurationSeconds = promauto.NewHistogram( + prometheus.HistogramOpts{ + Name: "garbage_collection_duration_seconds", + Help: "Histogram for GC runtime", + }, + ) + + gcTimeoutsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "garbage_collector_timeouts_total", + Help: "Total number of timeouts during GC", + }, + ) + + orchestratorCreationsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "orchestrator_creations_total", + Help: "Total number of created deployments", + }, + []string{"type", "name"}, + ) + + orchestratorDestructionsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "orchestrator_destructions_total", + Help: "Total number of destroyed deployments", + }, + []string{"type", "name"}, + ) + + orchestratorErrorsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "orchestrator_errors_total", + Help: "Total number of orchestrator errors", + }, + []string{"type", "name", "error"}, + ) + + orchestratorCreateDurationSeconds = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "orchestrator_create_duration_seconds", + Help: "Histogram of create operation duration", + }, + []string{"type", "name"}, + ) + + orchestratorCreateDurationSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "orchestrator_create_duration_seconds_total", + Help: "Total time required for create operations", + }, + []string{"type", "name"}, + ) + + orchestratorDestroyDurationSeconds = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "orchestrator_destroy_duration_seconds", + Help: "Histogram of destroy operation duration", + }, + []string{"type", "name"}, + ) + + orchestratorDestroyDurationSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "orchestrator_destroy_duration_seconds_total", + Help: "Total time required for destroy operations", + }, + []string{"type", "name"}, + ) + + codeGenerationErrorsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "code_generation_errors_total", + Help: "Total errors during code generation", + }, + []string{"type", "error"}, + ) + + codeGenerationsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "code_generations_total", + Help: "Total number of code generations", + }, + []string{"type"}, + ) + + codeGenerationDurationSeconds = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "code_generation_duration_seconds", + Help: "Histogram of code generation duration", + }, + []string{"type"}, + ) + + codeGenerationDurationSecondsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "code_generation_duration_seconds_total", + Help: "Total time required for code generation", + }, + []string{"type"}, + ) + + codeGenerationNumberOfModels = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "code_generation_number_of_models", + Help: "Histogram of models used for code generation", + }, + []string{"type"}, + ) +) + +func promStartHook(labels prometheus.Labels, counter *prometheus.CounterVec) time.Time { + counter.With(labels).Inc() + return time.Now() +} + +func promEndHook(labels prometheus.Labels, start time.Time, counter *prometheus.CounterVec, hist *prometheus.HistogramVec) { + duration := time.Since(start) + counter.With(labels).Add(duration.Seconds()) + hist.With(labels).Observe(duration.Seconds()) +} + +func promHandleError(labels prometheus.Labels, err error, counter *prometheus.CounterVec) error { + log.Error(err) + errLabels := make(prometheus.Labels) + for k, v := range labels { + errLabels[k] = v + } + errLabels["error"] = err.Error() + counter.With(errLabels).Inc() + return err +} diff --git a/orchestrator.go b/orchestrator.go index 144b052ba55d35ddacd41d3d0270e2d1f4f707fd..1b2f89b0b56db4a67a3c3d29f7d53cfb6c7b8f3d 100644 --- a/orchestrator.go +++ b/orchestrator.go @@ -17,6 +17,7 @@ import ( "github.com/docker/docker/pkg/stdcopy" "github.com/google/uuid" gpb "github.com/openconfig/gnmi/proto/gnmi" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" ) @@ -121,6 +122,9 @@ func (o *dockerOrchestrator) Repository() Repository { } func (o *dockerOrchestrator) deploy(d Deployment) (Deployment, error) { + labels := prometheus.Labels{"type": "docker"} + start := promStartHook(labels, orchestratorCreationsTotal) + defer promEndHook(labels, start, orchestratorCreateDurationSecondsTotal, orchestratorCreateDurationSeconds) ctx := context.Background() containerConfig := &container.Config{ @@ -129,17 +133,17 @@ func (o *dockerOrchestrator) deploy(d Deployment) (Deployment, error) { } resp, err := o.client.ContainerCreate(ctx, containerConfig, nil, nil, nil, "") if err != nil { - return Deployment{}, err + return Deployment{}, promHandleError(labels, err, orchestratorErrorsTotal) } log.Infof("container %v created", resp.ID) if err := o.client.NetworkConnect(ctx, config.DockerOrchestratorNetwork(), resp.ID, &network.EndpointSettings{}); err != nil { - return Deployment{}, err + return Deployment{}, promHandleError(labels, err, orchestratorErrorsTotal) } log.Infof("container %v attached to network", resp.ID) if err := o.client.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil { - return Deployment{}, err + return Deployment{}, promHandleError(labels, err, orchestratorErrorsTotal) } log.Infof("container %v started", resp.ID) @@ -181,24 +185,27 @@ func (o *dockerOrchestrator) attachLogger(ctx context.Context, containerID strin } func (o *dockerOrchestrator) prune(ctx context.Context, id uuid.UUID) error { + labels := prometheus.Labels{"type": "docker"} + start := promStartHook(labels, orchestratorDestructionsTotal) + defer promEndHook(labels, start, orchestratorDestroyDurationSecondsTotal, orchestratorDestroyDurationSeconds) done := make(chan time.Duration) go func() { start := time.Now() d, err := o.store.Get(id) if err != nil { - log.Error(err) + log.Error(promHandleError(labels, err, orchestratorErrorsTotal)) } if err := o.client.ContainerStop(ctx, o.activeContainers[id], &o.stopTimeout); err != nil { - log.Error(err) + log.Error(promHandleError(labels, err, orchestratorErrorsTotal)) } log.Debugf("stopped container for deployment %v", id) if err := o.client.ContainerRemove(ctx, o.activeContainers[id], types.ContainerRemoveOptions{RemoveVolumes: true}); err != nil { - log.Error(err) + log.Error(promHandleError(labels, err, orchestratorErrorsTotal)) } log.Debugf("removed container for deployment %v", id) resp, err := o.client.ImageRemove(ctx, d.ID.String(), types.ImageRemoveOptions{PruneChildren: true}) if err != nil { - log.Error(err) + log.Error(promHandleError(labels, err, orchestratorErrorsTotal)) } for _, r := range resp { log.WithFields(log.Fields{ @@ -215,6 +222,7 @@ func (o *dockerOrchestrator) prune(ctx context.Context, id uuid.UUID) error { }).Infof("deployment pruned") return nil case <-ctx.Done(): - return fmt.Errorf("pruning timed out for deployment %v", id) + err := fmt.Errorf("pruning timed out for deployment %v", id) + return promHandleError(labels, err, orchestratorErrorsTotal) } } diff --git a/run.go b/run.go index 5d5d41ef88a6bea5e9809bf168f35c7063befb1c..340f53f9a2bf27737aa34aa22b3d61bbec02d245 100644 --- a/run.go +++ b/run.go @@ -42,6 +42,7 @@ func Run(bindAddr string) { go func() { log.Info("starting to serve") + startHttpServer() if err := g.Serve(listen); err != nil { log.Fatal(err) } @@ -55,6 +56,7 @@ func Run(bindAddr string) { signal.Reset(os.Interrupt) ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() + stopHttpServer(ctx) o.Shutdown(ctx) }()