Skip to content
Snippets Groups Projects
Commit dbd7e7ee authored by Kamil Trzcinski's avatar Kamil Trzcinski
Browse files

Refactor docker-machine calls

This reduces number of API calls using docker-machine, since some of the data are cached.

- Use `Exist` instead of `CanConnect` when acquiring machine
- Cache `DockerCredentials` to not call it every time, just check the machine state with `CanConnect` (consumes one request per build)
- Refactor `Use` to unconditionally allocate machine if we can't connect to existing one
parent 60890166
No related tags found
No related merge requests found
...@@ -3,3 +3,6 @@ package machine ...@@ -3,3 +3,6 @@ package machine
import "time" import "time"
var provisionRetryInterval = time.Second var provisionRetryInterval = time.Second
var removalRetryInterval = time.Minute
var useMachineRetries = 10
var useMachineRetryInterval = 10 * time.Second
...@@ -8,15 +8,17 @@ import ( ...@@ -8,15 +8,17 @@ import (
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers" "gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers"
"gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers/docker"
) )
type machineDetails struct { type machineDetails struct {
Name string Name string
Created time.Time `yaml:"-"` Created time.Time `yaml:"-"`
Used time.Time `yaml:"-"` Used time.Time `yaml:"-"`
UsedCount int UsedCount int
State machineState State machineState
Reason string Reason string
Credentials *docker_helpers.DockerCredentials
} }
func (m *machineDetails) isUsed() bool { func (m *machineDetails) isUsed() bool {
......
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
type machineExecutor struct { type machineExecutor struct {
provider *machineProvider provider *machineProvider
executor common.Executor executor common.Executor
build *common.Build
data common.ExecutorData data common.ExecutorData
config common.RunnerConfig config common.RunnerConfig
} }
...@@ -24,6 +25,8 @@ func (e *machineExecutor) Shell() *common.ShellScriptInfo { ...@@ -24,6 +25,8 @@ func (e *machineExecutor) Shell() *common.ShellScriptInfo {
} }
func (e *machineExecutor) Prepare(globalConfig *common.Config, config *common.RunnerConfig, build *common.Build) (err error) { func (e *machineExecutor) Prepare(globalConfig *common.Config, config *common.RunnerConfig, build *common.Build) (err error) {
e.build = build
// Use the machine // Use the machine
e.config, e.data, err = e.provider.Use(config, build.ExecutorData) e.config, e.data, err = e.provider.Use(config, build.ExecutorData)
if err != nil { if err != nil {
...@@ -32,9 +35,7 @@ func (e *machineExecutor) Prepare(globalConfig *common.Config, config *common.Ru ...@@ -32,9 +35,7 @@ func (e *machineExecutor) Prepare(globalConfig *common.Config, config *common.Ru
// TODO: Currently the docker-machine doesn't support multiple builds // TODO: Currently the docker-machine doesn't support multiple builds
build.ProjectRunnerID = 0 build.ProjectRunnerID = 0
if details, _ := build.ExecutorData.(*machineDetails); details != nil { if details, _ := e.data.(*machineDetails); details != nil {
build.Hostname = details.Name
} else if details, _ := e.data.(*machineDetails); details != nil {
build.Hostname = details.Name build.Hostname = details.Name
} }
...@@ -66,7 +67,7 @@ func (e *machineExecutor) Cleanup() { ...@@ -66,7 +67,7 @@ func (e *machineExecutor) Cleanup() {
} }
// Release allocated machine // Release allocated machine
if e.data != "" { if e.data != nil && e.data != e.build.ExecutorData {
e.provider.Release(&e.config, e.data) e.provider.Release(&e.config, e.data)
e.data = nil e.data = nil
} }
......
...@@ -85,10 +85,8 @@ func (m *machineProvider) findFreeMachine(machines ...string) (details *machineD ...@@ -85,10 +85,8 @@ func (m *machineProvider) findFreeMachine(machines ...string) (details *machineD
continue continue
} }
// Check if node is running if !m.machine.Exist(name) {
canConnect := m.machine.CanConnect(name) m.remove(name, "machine does not exists")
if !canConnect {
m.remove(name, "machine is unavailable")
continue continue
} }
return details return details
...@@ -97,28 +95,29 @@ func (m *machineProvider) findFreeMachine(machines ...string) (details *machineD ...@@ -97,28 +95,29 @@ func (m *machineProvider) findFreeMachine(machines ...string) (details *machineD
return nil return nil
} }
func (m *machineProvider) useMachine(config *common.RunnerConfig) (details *machineDetails, err error) { func (m *machineProvider) findAndUseMachine(config *common.RunnerConfig) (dc docker_helpers.DockerCredentials, details *machineDetails, err error) {
machines, err := m.loadMachines(config) machines, err := m.loadMachines(config)
if err != nil { if err != nil {
return return
} }
details = m.findFreeMachine(machines...) details = m.findFreeMachine(machines...)
if details == nil { if details != nil {
var errCh chan error dc, err = m.useCredentials(config, details)
details, errCh = m.create(config, machineStateAcquired) if err != nil {
err = <-errCh details = nil
}
} }
return return
} }
func (m *machineProvider) retryUseMachine(config *common.RunnerConfig) (details *machineDetails, err error) { func (m *machineProvider) retryFindAndUseMachine(config *common.RunnerConfig) (dc docker_helpers.DockerCredentials, details *machineDetails, err error) {
// Try to find a machine // Try to find a machine
for i := 0; i < 3; i++ { for i := 0; i < useMachineRetries; i++ {
details, err = m.useMachine(config) dc, details, err = m.findAndUseMachine(config)
if err == nil { if err == nil {
break break
} }
time.Sleep(provisionRetryInterval) time.Sleep(useMachineRetryInterval)
} }
return return
} }
...@@ -138,7 +137,7 @@ func (m *machineProvider) finalizeRemoval(details *machineDetails) { ...@@ -138,7 +137,7 @@ func (m *machineProvider) finalizeRemoval(details *machineDetails) {
if err == nil { if err == nil {
break break
} }
time.Sleep(30 * time.Second) time.Sleep(removalRetryInterval)
logrus.WithField("name", details.Name). logrus.WithField("name", details.Name).
WithField("created", time.Since(details.Created)). WithField("created", time.Since(details.Created)).
WithField("used", time.Since(details.Used)). WithField("used", time.Since(details.Used)).
...@@ -174,6 +173,9 @@ func (m *machineProvider) remove(machineName string, reason ...interface{}) { ...@@ -174,6 +173,9 @@ func (m *machineProvider) remove(machineName string, reason ...interface{}) {
} }
func (m *machineProvider) updateMachine(config *common.RunnerConfig, data *machinesData, details *machineDetails) error { func (m *machineProvider) updateMachine(config *common.RunnerConfig, data *machinesData, details *machineDetails) error {
m.lock.RLock()
defer m.lock.RUnlock()
if details.State != machineStateIdle { if details.State != machineStateIdle {
return nil return nil
} }
...@@ -233,6 +235,26 @@ func (m *machineProvider) loadMachines(config *common.RunnerConfig) ([]string, e ...@@ -233,6 +235,26 @@ func (m *machineProvider) loadMachines(config *common.RunnerConfig) ([]string, e
return m.machine.List(machineFilter(config)) return m.machine.List(machineFilter(config))
} }
func (m *machineProvider) useCredentials(config *common.RunnerConfig, details *machineDetails) (dc docker_helpers.DockerCredentials, err error) {
if details.Credentials != nil {
if m.machine.CanConnect(details.Name) {
dc = *details.Credentials
} else {
err = fmt.Errorf("can't connect to: %s", details.Name)
m.Release(config, details)
}
return
}
dc, err = m.machine.Credentials(details.Name)
if err == nil {
details.Credentials = &dc
} else {
m.Release(config, details)
}
return
}
func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.ExecutorData, err error) { func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.ExecutorData, err error) {
if config.Machine == nil || config.Machine.MachineName == "" { if config.Machine == nil || config.Machine.MachineName == "" {
err = fmt.Errorf("Missing Machine options") err = fmt.Errorf("Missing Machine options")
...@@ -253,16 +275,17 @@ func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.Exec ...@@ -253,16 +275,17 @@ func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.Exec
// Pre-create machines // Pre-create machines
m.createMachines(config, &machinesData) m.createMachines(config, &machinesData)
m.acquireLock.Unlock()
logrus.WithFields(machinesData.Fields()). logrus.WithFields(machinesData.Fields()).
WithField("runner", config.ShortDescription()). WithField("runner", config.ShortDescription()).
WithField("minIdleCount", config.Machine.IdleCount). WithField("minIdleCount", config.Machine.IdleCount).
WithField("maxMachines", config.Limit). WithField("maxMachines", config.Limit).
WithField("time", time.Now()). WithField("time", time.Now()).
Debugln("Docker Machine Details") Debugln("Docker Machine Details")
machinesData.writeDebugInformation() machinesData.writeDebugInformation()
m.acquireLock.Unlock()
// Try to find a free machine // Try to find a free machine
details := m.findFreeMachine(machines...) details := m.findFreeMachine(machines...)
if details != nil { if details != nil {
...@@ -271,32 +294,55 @@ func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.Exec ...@@ -271,32 +294,55 @@ func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.Exec
} }
// If we have a free machines we can process a build // If we have a free machines we can process a build
if config.Machine.IdleCount != 0 && machinesData.Idle == 0 { if config.Machine.IdleCount != 0 {
err = errors.New("No free machines that can process builds") err = errors.New("No free machines that can process builds")
} }
return return
} }
func (m *machineProvider) createAndUseMachine(config *common.RunnerConfig) (dc docker_helpers.DockerCredentials, details *machineDetails, err error) {
var errCh chan error
details, errCh = m.create(config, machineStateAcquired)
err = <-errCh
if err != nil {
return
}
dc, err = m.useCredentials(config, details)
if err != nil {
details = nil
}
return
}
func (m *machineProvider) Use(config *common.RunnerConfig, data common.ExecutorData) (newConfig common.RunnerConfig, newData common.ExecutorData, err error) { func (m *machineProvider) Use(config *common.RunnerConfig, data common.ExecutorData) (newConfig common.RunnerConfig, newData common.ExecutorData, err error) {
var dc docker_helpers.DockerCredentials
// Find a new machine // Find a new machine
details, _ := data.(*machineDetails) details, _ := data.(*machineDetails)
// Get machine credentials
if details != nil {
dc, err = m.useCredentials(config, details)
if err != nil {
details = nil
}
}
// Try to find an existing machine
if details == nil { if details == nil {
details, err = m.retryUseMachine(config) dc, details, err = m.retryFindAndUseMachine(config)
if err != nil { if err != nil {
return return
} }
// Return details only if this is a new instance
newData = details
} }
// Get machine credentials // Try to create a new machine
dc, err := m.machine.Credentials(details.Name) if details == nil {
if err != nil { dc, details, err = m.createAndUseMachine(config)
if newData != nil { if err != nil {
m.Release(config, newData) return
} }
return
} }
// Create shallow copy of config and store in it docker credentials // Create shallow copy of config and store in it docker credentials
...@@ -306,6 +352,7 @@ func (m *machineProvider) Use(config *common.RunnerConfig, data common.ExecutorD ...@@ -306,6 +352,7 @@ func (m *machineProvider) Use(config *common.RunnerConfig, data common.ExecutorD
*newConfig.Docker = *config.Docker *newConfig.Docker = *config.Docker
} }
newConfig.Docker.DockerCredentials = dc newConfig.Docker.DockerCredentials = dc
newData = details
// Mark machine as used // Mark machine as used
details.State = machineStateUsed details.State = machineStateUsed
......
...@@ -106,6 +106,9 @@ func (m *testMachine) Remove(name string) error { ...@@ -106,6 +106,9 @@ func (m *testMachine) Remove(name string) error {
} }
func (m *testMachine) Exist(name string) bool { func (m *testMachine) Exist(name string) bool {
if strings.Contains(name, "no-can-connect") {
return false
}
for _, machine := range m.machines { for _, machine := range m.machines {
if machine == name { if machine == name {
return true return true
...@@ -190,9 +193,9 @@ func assertTotalMachines(t *testing.T, p *machineProvider, expected int, msgAndA ...@@ -190,9 +193,9 @@ func assertTotalMachines(t *testing.T, p *machineProvider, expected int, msgAndA
return false return false
} }
func testMachineProvider(machine ...string) (*machineProvider, *testMachine) { func testMachineProvider(machines ...string) (*machineProvider, *testMachine) {
t := &testMachine{ t := &testMachine{
machines: machine, machines: machines,
} }
p := &machineProvider{ p := &machineProvider{
details: make(machinesDetails), details: make(machinesDetails),
...@@ -222,7 +225,7 @@ func TestMachineDetails(t *testing.T) { ...@@ -222,7 +225,7 @@ func TestMachineDetails(t *testing.T) {
} }
func TestMachineFindFree(t *testing.T) { func TestMachineFindFree(t *testing.T) {
p, tm := testMachineProvider("no-can-connect") p, _ := testMachineProvider("no-can-connect", "machine1", "machine2")
d1 := p.findFreeMachine() d1 := p.findFreeMachine()
assert.Nil(t, d1, "no machines, return nil") assert.Nil(t, d1, "no machines, return nil")
...@@ -236,7 +239,6 @@ func TestMachineFindFree(t *testing.T) { ...@@ -236,7 +239,6 @@ func TestMachineFindFree(t *testing.T) {
assert.NotNil(t, d4, "acquire a new machine") assert.NotNil(t, d4, "acquire a new machine")
assert.NotEqual(t, d2, d4, "and it's a different machine") assert.NotEqual(t, d2, d4, "and it's a different machine")
assert.Len(t, tm.machines, 1, "has one machine")
d5 := p.findFreeMachine("machine1", "no-can-connect") d5 := p.findFreeMachine("machine1", "no-can-connect")
assert.Nil(t, d5, "fails to acquire machine to which he can't connect") assert.Nil(t, d5, "fails to acquire machine to which he can't connect")
} }
...@@ -270,33 +272,62 @@ func TestMachineUse(t *testing.T) { ...@@ -270,33 +272,62 @@ func TestMachineUse(t *testing.T) {
p, _ := testMachineProvider("machine1") p, _ := testMachineProvider("machine1")
d1, err := p.useMachine(machineDefaultConfig) _, d1, err := p.findAndUseMachine(machineDefaultConfig)
assert.NotNil(t, d1) assert.NotNil(t, d1)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, machineStateAcquired, d1.State) assert.Equal(t, machineStateAcquired, d1.State)
assert.Equal(t, "machine1", d1.Name, "finds a free machine1") assert.Equal(t, "machine1", d1.Name, "finds a free machine1")
d2, err := p.useMachine(machineDefaultConfig) _, d2, err := p.findAndUseMachine(machineProvisionFail)
assert.NotNil(t, d2) assert.Nil(t, d2, "fails to find a machine")
assert.NoError(t, err) assert.NoError(t, err, "this is not an error")
assert.Equal(t, machineStateAcquired, d2.State)
assert.NotEqual(t, "machine1", d2.Name, "creates a new machine")
_, err = p.useMachine(machineProvisionFail)
assert.Error(t, err, "fails to create a new machine")
} }
func TestMachineTestRetry(t *testing.T) { func TestMachineTestRetry(t *testing.T) {
provisionRetryInterval = 0 provisionRetryInterval = 0
useMachineRetryInterval = 0
useMachineRetries = 3
p, _ := testMachineProvider() p, _ := testMachineProvider()
_, err := p.useMachine(machineSecondFail) _, d, err := p.retryFindAndUseMachine(machineSecondFail)
assert.Error(t, err, "fails to create a new machine") assert.Nil(t, d, "fails to find a free machine")
assert.NoError(t, err, "this is not an error")
}
p, _ = testMachineProvider() func TestUseCredentials(t *testing.T) {
d1, err := p.retryUseMachine(machineSecondFail) p, _ := testMachineProvider()
assert.NoError(t, err, "after replying the same test scenario and using retry it succeeds")
assert.Equal(t, machineStateAcquired, d1.State) details := p.machineDetails("machine1", true)
_, err := p.useCredentials(nil, details)
assert.NoError(t, err, "successfully gets credentials")
_, err = p.useCredentials(nil, details)
assert.NoError(t, err, "successfully gets credentials for second time")
details = p.machineDetails("no-connect", true)
_, err = p.useCredentials(nil, details)
assert.Error(t, err, "fails to get credentials when can connect to machine")
assert.Equal(t, machineStateIdle, details.State, "machine should be released")
details = p.machineDetails("no-can-connect", true)
_, err = p.useCredentials(nil, details)
assert.NoError(t, err, "successfully gets credentials for the first time")
assert.Equal(t, machineStateAcquired, details.State, "machine should be acquired")
_, err = p.useCredentials(nil, details)
assert.Error(t, err, "fails to get credentials when cannnot connect")
assert.Equal(t, machineStateIdle, details.State, "machine should be released")
}
func TestCreateAndUseMachine(t *testing.T) {
p, _ := testMachineProvider()
_, d, err := p.createAndUseMachine(machineDefaultConfig)
assert.NoError(t, err, "succesfully creates machine")
assert.Equal(t, machineStateAcquired, d.State, "machine should be acquired")
_, d, err = p.createAndUseMachine(machineNoConnect)
assert.Error(t, err, "fails to create a machine")
} }
func TestMachineAcquireAndRelease(t *testing.T) { func TestMachineAcquireAndRelease(t *testing.T) {
...@@ -396,7 +427,7 @@ func TestMachineMaxBuilds(t *testing.T) { ...@@ -396,7 +427,7 @@ func TestMachineMaxBuilds(t *testing.T) {
_, nd, err := p.Use(config, d) _, nd, err := p.Use(config, d)
assert.NoError(t, err) assert.NoError(t, err)
assert.Nil(t, nd, "we passed the data, we should not get the data now") assert.Equal(t, d, nd, "we passed the data, we should get the same data now")
p.Release(config, d) p.Release(config, d)
...@@ -457,3 +488,44 @@ func TestMachineUseOnDemand(t *testing.T) { ...@@ -457,3 +488,44 @@ func TestMachineUseOnDemand(t *testing.T) {
assert.Error(t, err, "fail to create a new machine on connect") assert.Error(t, err, "fail to create a new machine on connect")
assertTotalMachines(t, p, 3, "it fails on no-connect, but we leave the machine created") assertTotalMachines(t, p, 3, "it fails on no-connect, but we leave the machine created")
} }
func TestMachineCanConnectFailed(t *testing.T) {
provisionRetryInterval = 0
removalRetryInterval = 0
useMachineRetryInterval = 0
p, _ := testMachineProvider()
d := p.machineDetails("no-connect", true)
_, nd, err := p.Use(machineProvisionFail, d)
assert.Error(t, err, "it fails to assign a machine")
assert.Nil(t, nd)
assert.Equal(t, machineStateIdle, d.State, "releases invalid machine")
}
func TestMachineUseCreatesMachine(t *testing.T) {
provisionRetryInterval = 0
removalRetryInterval = 0
useMachineRetryInterval = 0
p, _ := testMachineProvider()
d := p.machineDetails("no-connect", true)
_, nd, err := p.Use(machineDefaultConfig, d)
assert.NoError(t, err, "it creates a new machine")
assert.NotNil(t, nd)
assert.Equal(t, machineStateIdle, d.State, "releases invalid machine")
}
func TestMachineUseFindsANewMachine(t *testing.T) {
provisionRetryInterval = 0
removalRetryInterval = 0
useMachineRetryInterval = 0
p, _ := testMachineProvider("machine2")
d := p.machineDetails("no-connect", true)
_, nd, err := p.Use(machineDefaultConfig, d)
assert.NoError(t, err, "it find an existing machine")
assert.NotNil(t, nd)
d2 := p.machineDetails("machine2", false)
assert.Equal(t, nd, d2, "it should be machine2")
assert.Equal(t, machineStateIdle, d.State, "releases invalid machine")
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment