From dd916308348872581e94811a49899f11dbce6e45 Mon Sep 17 00:00:00 2001
From: Malte Bauch <malte.bauch@stud.h-da.de>
Date: Mon, 7 Mar 2022 17:02:30 +0100
Subject: [PATCH] A panic within `createCsbiDevice` does not result in a crash

- Removed Goroutine in `createCsbiDevice` and calls of `panic()` have been
  changed to `return err`
- `createCsbiDevice` is now called as Goroutine from within the
  `handleCsbiEnrolment` method.
- Error handling is done by using a `ErrorGroup`

These changes prevent that panics are thrown from a Goroutine that was
originally called from `createCsbiDevice`. Now normal errors are thrown
and catched with the help of a `ErrorGroup`.

Fixes: #171
---
 go.mod                            |  4 +-
 go.sum                            |  1 +
 nucleus/principalNetworkDomain.go | 90 ++++++++++++++++---------------
 3 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/go.mod b/go.mod
index 955149660..e26fa1d1d 100644
--- a/go.mod
+++ b/go.mod
@@ -10,6 +10,7 @@ require (
 	github.com/docker/docker v20.10.11+incompatible
 	github.com/google/go-cmp v0.5.6
 	github.com/google/uuid v1.2.0
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.2
 	github.com/openconfig/gnmi v0.0.0-20210914185457-51254b657b7d
 	github.com/openconfig/goyang v0.3.1
 	github.com/openconfig/ygot v0.12.5
@@ -19,12 +20,11 @@ require (
 	github.com/spf13/viper v1.9.0
 	github.com/stretchr/objx v0.2.0 // indirect
 	github.com/stretchr/testify v1.7.0
+	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 	google.golang.org/grpc v1.43.0
 	google.golang.org/protobuf v1.27.1
 )
 
-require github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.2
-
 require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.1.1 // indirect
diff --git a/go.sum b/go.sum
index b411756bc..9321fe892 100644
--- a/go.sum
+++ b/go.sum
@@ -774,6 +774,7 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
diff --git a/nucleus/principalNetworkDomain.go b/nucleus/principalNetworkDomain.go
index 3492e1c70..a87fdea3e 100644
--- a/nucleus/principalNetworkDomain.go
+++ b/nucleus/principalNetworkDomain.go
@@ -10,6 +10,7 @@ import (
 
 	"code.fbi.h-da.de/danet/gosdn/metrics"
 	"code.fbi.h-da.de/danet/gosdn/nucleus/types"
+	"golang.org/x/sync/errgroup"
 
 	cpb "code.fbi.h-da.de/danet/api/go/gosdn/csbi"
 	ppb "code.fbi.h-da.de/danet/api/go/gosdn/pnd"
@@ -401,6 +402,7 @@ func (pnd *pndImplementation) handleCsbiDeletion(id uuid.UUID) error {
 }
 
 func (pnd *pndImplementation) handleCsbiEnrolment(name string, opt *tpb.TransportOption) error {
+	g := new(errgroup.Group)
 	ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5)
 	defer cancel()
 	req := &cpb.CreateRequest{
@@ -412,61 +414,65 @@ func (pnd *pndImplementation) handleCsbiEnrolment(name string, opt *tpb.Transpor
 		return err
 	}
 	for _, d := range resp.Deployments {
-		if err := pnd.createCsbiDevice(ctx, name, d, opt); err != nil {
-			log.Error(err)
-		}
+		dCopy := d
+		g.Go(func() error {
+			return pnd.createCsbiDevice(ctx, name, dCopy, opt)
+		})
 	}
+	err = g.Wait()
+	if err != nil {
+		return err
+	}
+
 	return nil
 }
 
+// createCsbiDevice is a helper method for cSBI device creation. The method
+// waits for a SYN (which indicates that the cSBI is running and addressable)
+// of the commissioned cSBI and creates the device within the controller.
 func (pnd *pndImplementation) createCsbiDevice(ctx context.Context, name string, d *cpb.Deployment, opt *tpb.TransportOption) error {
-	defer func() {
-		if r := recover(); r != nil {
-			log.Errorf("recovered in sbi enrolment: %v", r)
-		}
-	}()
 	id, err := uuid.Parse(d.Id)
 	if err != nil {
 		return err
 	}
 	ch := make(chan store.DeviceDetails, 1)
 	pnd.callback(id, ch)
+	defer pnd.callback(id, nil)
+	defer close(ch)
 	tickatus := time.NewTicker(time.Minute * 1)
-	go func() {
-		select {
-		case <-tickatus.C:
-			log.WithFields(log.Fields{
-				"id":  d.Id,
-				"err": ctx.Err(),
-			}).Error("csbi handshake timed out")
-		case deviceDetails := <-ch:
-			log.Infof("syn from csbi %v", deviceDetails.ID)
-			id, err := uuid.Parse(deviceDetails.ID)
-			if err != nil {
-				panic(err)
-			}
-			csbiTransportOptions := &tpb.TransportOption{
-				Address:         deviceDetails.Address,
-				Username:        opt.Username,
-				Password:        opt.Password,
-				Tls:             opt.Tls,
-				Type:            opt.Type,
-				TransportOption: opt.TransportOption,
-			}
-			log.WithField("transport option", csbiTransportOptions).Debug("gosdn gnmi transport options")
-			d, err := NewDevice(name, uuid.Nil, csbiTransportOptions, csbi)
-			if err != nil {
-				panic(err)
-			}
-			d.(*CsbiDevice).UUID = id
-			ch <- store.DeviceDetails{TransportOption: opt}
-			if err := pnd.devices.Add(d, d.Name()); err != nil {
-				panic(err)
-			}
+	defer tickatus.Stop()
+	select {
+	case <-tickatus.C:
+		log.WithFields(log.Fields{
+			"id":  d.Id,
+			"err": ctx.Err(),
+		}).Error("csbi handshake timed out")
+	case deviceDetails := <-ch:
+		log.Infof("syn from csbi %v", deviceDetails.ID)
+		id, err := uuid.Parse(deviceDetails.ID)
+		if err != nil {
+			return err
+		}
+		csbiTransportOptions := &tpb.TransportOption{
+			Address:         deviceDetails.Address,
+			Username:        opt.Username,
+			Password:        opt.Password,
+			Tls:             opt.Tls,
+			Type:            opt.Type,
+			TransportOption: opt.TransportOption,
+		}
+		log.WithField("transport option", csbiTransportOptions).Debug("gosdn gnmi transport options")
+		d, err := NewDevice(name, uuid.Nil, csbiTransportOptions, csbi)
+		if err != nil {
+			return err
+		}
+		d.(*CsbiDevice).UUID = id
+		ch <- store.DeviceDetails{TransportOption: opt}
+		if err := pnd.devices.Add(d, d.Name()); err != nil {
+			return err
 		}
-		pnd.callback(id, nil)
-		close(ch)
-	}()
+	}
+
 	return nil
 }
 
-- 
GitLab