Skip to content
Snippets Groups Projects
Commit dbd7e7ee authored by Kamil Trzcinski's avatar Kamil Trzcinski
Browse files

Refactor docker-machine calls

This reduces number of API calls using docker-machine, since some of the data are cached.

- Use `Exist` instead of `CanConnect` when acquiring machine
- Cache `DockerCredentials` to not call it every time, just check the machine state with `CanConnect` (consumes one request per build)
- Refactor `Use` to unconditionally allocate machine if we can't connect to existing one
parent 60890166
No related tags found
No related merge requests found
......@@ -3,3 +3,6 @@ package machine
import "time"
var provisionRetryInterval = time.Second
var removalRetryInterval = time.Minute
var useMachineRetries = 10
var useMachineRetryInterval = 10 * time.Second
......@@ -8,15 +8,17 @@ import (
"github.com/Sirupsen/logrus"
"gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers"
"gitlab.com/gitlab-org/gitlab-ci-multi-runner/helpers/docker"
)
type machineDetails struct {
Name string
Created time.Time `yaml:"-"`
Used time.Time `yaml:"-"`
UsedCount int
State machineState
Reason string
Name string
Created time.Time `yaml:"-"`
Used time.Time `yaml:"-"`
UsedCount int
State machineState
Reason string
Credentials *docker_helpers.DockerCredentials
}
func (m *machineDetails) isUsed() bool {
......
......@@ -12,6 +12,7 @@ import (
type machineExecutor struct {
provider *machineProvider
executor common.Executor
build *common.Build
data common.ExecutorData
config common.RunnerConfig
}
......@@ -24,6 +25,8 @@ func (e *machineExecutor) Shell() *common.ShellScriptInfo {
}
func (e *machineExecutor) Prepare(globalConfig *common.Config, config *common.RunnerConfig, build *common.Build) (err error) {
e.build = build
// Use the machine
e.config, e.data, err = e.provider.Use(config, build.ExecutorData)
if err != nil {
......@@ -32,9 +35,7 @@ func (e *machineExecutor) Prepare(globalConfig *common.Config, config *common.Ru
// TODO: Currently the docker-machine doesn't support multiple builds
build.ProjectRunnerID = 0
if details, _ := build.ExecutorData.(*machineDetails); details != nil {
build.Hostname = details.Name
} else if details, _ := e.data.(*machineDetails); details != nil {
if details, _ := e.data.(*machineDetails); details != nil {
build.Hostname = details.Name
}
......@@ -66,7 +67,7 @@ func (e *machineExecutor) Cleanup() {
}
// Release allocated machine
if e.data != "" {
if e.data != nil && e.data != e.build.ExecutorData {
e.provider.Release(&e.config, e.data)
e.data = nil
}
......
......@@ -85,10 +85,8 @@ func (m *machineProvider) findFreeMachine(machines ...string) (details *machineD
continue
}
// Check if node is running
canConnect := m.machine.CanConnect(name)
if !canConnect {
m.remove(name, "machine is unavailable")
if !m.machine.Exist(name) {
m.remove(name, "machine does not exists")
continue
}
return details
......@@ -97,28 +95,29 @@ func (m *machineProvider) findFreeMachine(machines ...string) (details *machineD
return nil
}
func (m *machineProvider) useMachine(config *common.RunnerConfig) (details *machineDetails, err error) {
func (m *machineProvider) findAndUseMachine(config *common.RunnerConfig) (dc docker_helpers.DockerCredentials, details *machineDetails, err error) {
machines, err := m.loadMachines(config)
if err != nil {
return
}
details = m.findFreeMachine(machines...)
if details == nil {
var errCh chan error
details, errCh = m.create(config, machineStateAcquired)
err = <-errCh
if details != nil {
dc, err = m.useCredentials(config, details)
if err != nil {
details = nil
}
}
return
}
func (m *machineProvider) retryUseMachine(config *common.RunnerConfig) (details *machineDetails, err error) {
func (m *machineProvider) retryFindAndUseMachine(config *common.RunnerConfig) (dc docker_helpers.DockerCredentials, details *machineDetails, err error) {
// Try to find a machine
for i := 0; i < 3; i++ {
details, err = m.useMachine(config)
for i := 0; i < useMachineRetries; i++ {
dc, details, err = m.findAndUseMachine(config)
if err == nil {
break
}
time.Sleep(provisionRetryInterval)
time.Sleep(useMachineRetryInterval)
}
return
}
......@@ -138,7 +137,7 @@ func (m *machineProvider) finalizeRemoval(details *machineDetails) {
if err == nil {
break
}
time.Sleep(30 * time.Second)
time.Sleep(removalRetryInterval)
logrus.WithField("name", details.Name).
WithField("created", time.Since(details.Created)).
WithField("used", time.Since(details.Used)).
......@@ -174,6 +173,9 @@ func (m *machineProvider) remove(machineName string, reason ...interface{}) {
}
func (m *machineProvider) updateMachine(config *common.RunnerConfig, data *machinesData, details *machineDetails) error {
m.lock.RLock()
defer m.lock.RUnlock()
if details.State != machineStateIdle {
return nil
}
......@@ -233,6 +235,26 @@ func (m *machineProvider) loadMachines(config *common.RunnerConfig) ([]string, e
return m.machine.List(machineFilter(config))
}
func (m *machineProvider) useCredentials(config *common.RunnerConfig, details *machineDetails) (dc docker_helpers.DockerCredentials, err error) {
if details.Credentials != nil {
if m.machine.CanConnect(details.Name) {
dc = *details.Credentials
} else {
err = fmt.Errorf("can't connect to: %s", details.Name)
m.Release(config, details)
}
return
}
dc, err = m.machine.Credentials(details.Name)
if err == nil {
details.Credentials = &dc
} else {
m.Release(config, details)
}
return
}
func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.ExecutorData, err error) {
if config.Machine == nil || config.Machine.MachineName == "" {
err = fmt.Errorf("Missing Machine options")
......@@ -253,16 +275,17 @@ func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.Exec
// Pre-create machines
m.createMachines(config, &machinesData)
m.acquireLock.Unlock()
logrus.WithFields(machinesData.Fields()).
WithField("runner", config.ShortDescription()).
WithField("minIdleCount", config.Machine.IdleCount).
WithField("maxMachines", config.Limit).
WithField("time", time.Now()).
Debugln("Docker Machine Details")
machinesData.writeDebugInformation()
m.acquireLock.Unlock()
// Try to find a free machine
details := m.findFreeMachine(machines...)
if details != nil {
......@@ -271,32 +294,55 @@ func (m *machineProvider) Acquire(config *common.RunnerConfig) (data common.Exec
}
// If we have a free machines we can process a build
if config.Machine.IdleCount != 0 && machinesData.Idle == 0 {
if config.Machine.IdleCount != 0 {
err = errors.New("No free machines that can process builds")
}
return
}
func (m *machineProvider) createAndUseMachine(config *common.RunnerConfig) (dc docker_helpers.DockerCredentials, details *machineDetails, err error) {
var errCh chan error
details, errCh = m.create(config, machineStateAcquired)
err = <-errCh
if err != nil {
return
}
dc, err = m.useCredentials(config, details)
if err != nil {
details = nil
}
return
}
func (m *machineProvider) Use(config *common.RunnerConfig, data common.ExecutorData) (newConfig common.RunnerConfig, newData common.ExecutorData, err error) {
var dc docker_helpers.DockerCredentials
// Find a new machine
details, _ := data.(*machineDetails)
// Get machine credentials
if details != nil {
dc, err = m.useCredentials(config, details)
if err != nil {
details = nil
}
}
// Try to find an existing machine
if details == nil {
details, err = m.retryUseMachine(config)
dc, details, err = m.retryFindAndUseMachine(config)
if err != nil {
return
}
// Return details only if this is a new instance
newData = details
}
// Get machine credentials
dc, err := m.machine.Credentials(details.Name)
if err != nil {
if newData != nil {
m.Release(config, newData)
// Try to create a new machine
if details == nil {
dc, details, err = m.createAndUseMachine(config)
if err != nil {
return
}
return
}
// Create shallow copy of config and store in it docker credentials
......@@ -306,6 +352,7 @@ func (m *machineProvider) Use(config *common.RunnerConfig, data common.ExecutorD
*newConfig.Docker = *config.Docker
}
newConfig.Docker.DockerCredentials = dc
newData = details
// Mark machine as used
details.State = machineStateUsed
......
......@@ -106,6 +106,9 @@ func (m *testMachine) Remove(name string) error {
}
func (m *testMachine) Exist(name string) bool {
if strings.Contains(name, "no-can-connect") {
return false
}
for _, machine := range m.machines {
if machine == name {
return true
......@@ -190,9 +193,9 @@ func assertTotalMachines(t *testing.T, p *machineProvider, expected int, msgAndA
return false
}
func testMachineProvider(machine ...string) (*machineProvider, *testMachine) {
func testMachineProvider(machines ...string) (*machineProvider, *testMachine) {
t := &testMachine{
machines: machine,
machines: machines,
}
p := &machineProvider{
details: make(machinesDetails),
......@@ -222,7 +225,7 @@ func TestMachineDetails(t *testing.T) {
}
func TestMachineFindFree(t *testing.T) {
p, tm := testMachineProvider("no-can-connect")
p, _ := testMachineProvider("no-can-connect", "machine1", "machine2")
d1 := p.findFreeMachine()
assert.Nil(t, d1, "no machines, return nil")
......@@ -236,7 +239,6 @@ func TestMachineFindFree(t *testing.T) {
assert.NotNil(t, d4, "acquire a new machine")
assert.NotEqual(t, d2, d4, "and it's a different machine")
assert.Len(t, tm.machines, 1, "has one machine")
d5 := p.findFreeMachine("machine1", "no-can-connect")
assert.Nil(t, d5, "fails to acquire machine to which he can't connect")
}
......@@ -270,33 +272,62 @@ func TestMachineUse(t *testing.T) {
p, _ := testMachineProvider("machine1")
d1, err := p.useMachine(machineDefaultConfig)
_, d1, err := p.findAndUseMachine(machineDefaultConfig)
assert.NotNil(t, d1)
assert.NoError(t, err)
assert.Equal(t, machineStateAcquired, d1.State)
assert.Equal(t, "machine1", d1.Name, "finds a free machine1")
d2, err := p.useMachine(machineDefaultConfig)
assert.NotNil(t, d2)
assert.NoError(t, err)
assert.Equal(t, machineStateAcquired, d2.State)
assert.NotEqual(t, "machine1", d2.Name, "creates a new machine")
_, err = p.useMachine(machineProvisionFail)
assert.Error(t, err, "fails to create a new machine")
_, d2, err := p.findAndUseMachine(machineProvisionFail)
assert.Nil(t, d2, "fails to find a machine")
assert.NoError(t, err, "this is not an error")
}
func TestMachineTestRetry(t *testing.T) {
provisionRetryInterval = 0
useMachineRetryInterval = 0
useMachineRetries = 3
p, _ := testMachineProvider()
_, err := p.useMachine(machineSecondFail)
assert.Error(t, err, "fails to create a new machine")
_, d, err := p.retryFindAndUseMachine(machineSecondFail)
assert.Nil(t, d, "fails to find a free machine")
assert.NoError(t, err, "this is not an error")
}
p, _ = testMachineProvider()
d1, err := p.retryUseMachine(machineSecondFail)
assert.NoError(t, err, "after replying the same test scenario and using retry it succeeds")
assert.Equal(t, machineStateAcquired, d1.State)
func TestUseCredentials(t *testing.T) {
p, _ := testMachineProvider()
details := p.machineDetails("machine1", true)
_, err := p.useCredentials(nil, details)
assert.NoError(t, err, "successfully gets credentials")
_, err = p.useCredentials(nil, details)
assert.NoError(t, err, "successfully gets credentials for second time")
details = p.machineDetails("no-connect", true)
_, err = p.useCredentials(nil, details)
assert.Error(t, err, "fails to get credentials when can connect to machine")
assert.Equal(t, machineStateIdle, details.State, "machine should be released")
details = p.machineDetails("no-can-connect", true)
_, err = p.useCredentials(nil, details)
assert.NoError(t, err, "successfully gets credentials for the first time")
assert.Equal(t, machineStateAcquired, details.State, "machine should be acquired")
_, err = p.useCredentials(nil, details)
assert.Error(t, err, "fails to get credentials when cannnot connect")
assert.Equal(t, machineStateIdle, details.State, "machine should be released")
}
func TestCreateAndUseMachine(t *testing.T) {
p, _ := testMachineProvider()
_, d, err := p.createAndUseMachine(machineDefaultConfig)
assert.NoError(t, err, "succesfully creates machine")
assert.Equal(t, machineStateAcquired, d.State, "machine should be acquired")
_, d, err = p.createAndUseMachine(machineNoConnect)
assert.Error(t, err, "fails to create a machine")
}
func TestMachineAcquireAndRelease(t *testing.T) {
......@@ -396,7 +427,7 @@ func TestMachineMaxBuilds(t *testing.T) {
_, nd, err := p.Use(config, d)
assert.NoError(t, err)
assert.Nil(t, nd, "we passed the data, we should not get the data now")
assert.Equal(t, d, nd, "we passed the data, we should get the same data now")
p.Release(config, d)
......@@ -457,3 +488,44 @@ func TestMachineUseOnDemand(t *testing.T) {
assert.Error(t, err, "fail to create a new machine on connect")
assertTotalMachines(t, p, 3, "it fails on no-connect, but we leave the machine created")
}
func TestMachineCanConnectFailed(t *testing.T) {
provisionRetryInterval = 0
removalRetryInterval = 0
useMachineRetryInterval = 0
p, _ := testMachineProvider()
d := p.machineDetails("no-connect", true)
_, nd, err := p.Use(machineProvisionFail, d)
assert.Error(t, err, "it fails to assign a machine")
assert.Nil(t, nd)
assert.Equal(t, machineStateIdle, d.State, "releases invalid machine")
}
func TestMachineUseCreatesMachine(t *testing.T) {
provisionRetryInterval = 0
removalRetryInterval = 0
useMachineRetryInterval = 0
p, _ := testMachineProvider()
d := p.machineDetails("no-connect", true)
_, nd, err := p.Use(machineDefaultConfig, d)
assert.NoError(t, err, "it creates a new machine")
assert.NotNil(t, nd)
assert.Equal(t, machineStateIdle, d.State, "releases invalid machine")
}
func TestMachineUseFindsANewMachine(t *testing.T) {
provisionRetryInterval = 0
removalRetryInterval = 0
useMachineRetryInterval = 0
p, _ := testMachineProvider("machine2")
d := p.machineDetails("no-connect", true)
_, nd, err := p.Use(machineDefaultConfig, d)
assert.NoError(t, err, "it find an existing machine")
assert.NotNil(t, nd)
d2 := p.machineDetails("machine2", false)
assert.Equal(t, nd, d2, "it should be machine2")
assert.Equal(t, machineStateIdle, d.State, "releases invalid machine")
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment