/
main.go
378 lines (325 loc) · 14 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
// Copyright 2018-2020 Authors of Cilium
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"time"
"github.com/cilium/cilium/pkg/defaults"
"github.com/cilium/cilium/pkg/k8s"
clientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
"github.com/cilium/cilium/pkg/k8s/types"
k8sversion "github.com/cilium/cilium/pkg/k8s/version"
"github.com/cilium/cilium/pkg/kvstore"
"github.com/cilium/cilium/pkg/logging"
"github.com/cilium/cilium/pkg/logging/logfields"
"github.com/cilium/cilium/pkg/option"
"github.com/cilium/cilium/pkg/version"
gops "github.com/google/gops/agent"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/cobra/doc"
"github.com/spf13/viper"
"google.golang.org/grpc"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"
)
var (
log = logging.DefaultLogger.WithField(logfields.LogSubsys, "cilium-operator")
rootCmd = &cobra.Command{
Use: "cilium-operator",
Short: "Run the cilium-operator",
Run: func(cmd *cobra.Command, args []string) {
runOperator(cmd)
},
}
k8sAPIServer string
k8sKubeConfigPath string
kvStore string
kvStoreOpts = make(map[string]string)
apiServerPort uint16
shutdownSignal = make(chan struct{})
synchronizeServices bool
enableCepGC bool
synchronizeNodes bool
enableMetrics bool
metricsAddress string
eniParallelWorkers int64
enableENI bool
k8sIdentityGCInterval time.Duration
k8sIdentityHeartbeatTimeout time.Duration
ciliumK8sClient clientset.Interface
cmdRefDir string
)
func main() {
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-signals
gops.Close()
close(shutdownSignal)
}()
// Open socket for using gops to get stacktraces of the agent.
if err := gops.Listen(gops.Options{}); err != nil {
errorString := fmt.Sprintf("unable to start gops: %s", err)
fmt.Println(errorString)
os.Exit(-1)
}
if err := rootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(-1)
}
}
func init() {
cobra.OnInitialize(initConfig)
flags := rootCmd.Flags()
flags.Bool("version", false, "Print version information")
flags.Int(option.ClusterIDName, 0, "Unique identifier of the cluster")
option.BindEnv(option.ClusterIDName)
flags.String(option.ClusterName, defaults.ClusterName, "Name of the cluster")
option.BindEnv(option.ClusterName)
flags.BoolP("debug", "D", false, "Enable debugging mode")
flags.StringVar(&k8sAPIServer, "k8s-api-server", "", "Kubernetes api address server (for https use --k8s-kubeconfig-path instead)")
flags.StringVar(&k8sKubeConfigPath, "k8s-kubeconfig-path", "", "Absolute path of the kubernetes kubeconfig file")
flags.String(option.KVStore, "", "Key-value store type")
option.BindEnv(option.KVStore)
flags.Var(option.NewNamedMapOptions(option.KVStoreOpt, &kvStoreOpts, nil), option.KVStoreOpt, "Key-value store options")
option.BindEnv(option.KVStoreOpt)
flags.Uint16Var(&apiServerPort, "api-server-port", 9234, "Port on which the operator should serve API requests")
flags.String(option.IPAM, "", "Backend to use for IPAM")
option.BindEnv(option.IPAM)
flags.Bool(option.AwsReleaseExcessIps, false, "Enable releasing excess free IP addresses from AWS ENI.")
option.BindEnv(option.AwsReleaseExcessIps)
flags.BoolVar(&enableMetrics, "enable-metrics", false, "Enable Prometheus metrics")
flags.StringVar(&metricsAddress, "metrics-address", ":6942", "Address to serve Prometheus metrics")
flags.BoolVar(&synchronizeServices, "synchronize-k8s-services", true, "Synchronize Kubernetes services to kvstore")
flags.BoolVar(&synchronizeNodes, "synchronize-k8s-nodes", true, "Synchronize Kubernetes nodes to kvstore and perform CNP GC")
flags.DurationVar(&k8sIdentityHeartbeatTimeout, "identity-heartbeat-timeout", 15*time.Minute, "Timeout after which identity expires on lack of heartbeat")
flags.BoolVar(&enableCepGC, "cilium-endpoint-gc", true, "Enable CiliumEndpoint garbage collector")
flags.DurationVar(&ciliumEndpointGCInterval, "cilium-endpoint-gc-interval", time.Minute*30, "GC interval for cilium endpoints")
flags.StringVar(&identityAllocationMode, option.IdentityAllocationMode, option.IdentityAllocationModeKVstore, "Method to use for identity allocation")
option.BindEnv(option.IdentityAllocationMode)
flags.DurationVar(&identityGCInterval, "identity-gc-interval", defaults.KVstoreLeaseTTL, "GC interval for security identities")
flags.DurationVar(&kvNodeGCInterval, "nodes-gc-interval", time.Minute*2, "GC interval for nodes store in the kvstore")
flags.Int64Var(&eniParallelWorkers, "eni-parallel-workers", 50, "Maximum number of parallel workers used by ENI allocator")
flags.String(option.K8sNamespaceName, "", "Name of the Kubernetes namespace in which Cilium Operator is deployed in")
flags.MarkHidden(option.K8sNamespaceName)
option.BindEnv(option.K8sNamespaceName)
flags.IntVar(&unmanagedKubeDnsWatcherInterval, "unmanaged-pod-watcher-interval", 15, "Interval to check for unmanaged kube-dns pods (0 to disable)")
flags.Int(option.AWSClientBurst, 4, "Burst value allowed for the AWS client used by the AWS ENI IPAM")
flags.Float64(option.AWSClientQPSLimit, 20.0, "Queries per second limit for the AWS client used by the AWS ENI IPAM")
flags.Float32(option.K8sClientQPSLimit, defaults.K8sClientQPSLimit, "Queries per second limit for the K8s client")
flags.Int(option.K8sClientBurst, defaults.K8sClientBurst, "Burst value allowed for the K8s client")
// We need to obtain from Cilium ConfigMap if the CiliumEndpointCRD option
// is enabled or disabled. This option is marked as hidden because the
// Cilium Endpoint CRD controller is not in this program and by having it
// being printed by operator --help could confuse users.
flags.Bool(option.DisableCiliumEndpointCRDName, false, "")
flags.MarkHidden(option.DisableCiliumEndpointCRDName)
option.BindEnv(option.DisableCiliumEndpointCRDName)
flags.BoolVar(&enableCNPNodeStatusGC, "cnp-node-status-gc", true, "Enable CiliumNetworkPolicy Status garbage collection for nodes which have been removed from the cluster")
flags.DurationVar(&ciliumCNPNodeStatusGCInterval, "cnp-node-status-gc-interval", time.Minute*2, "GC interval for nodes which have been removed from the cluster in CiliumNetworkPolicy Status")
flags.StringVar(&cmdRefDir, "cmdref", "", "Path to cmdref output directory")
flags.MarkHidden("cmdref")
viper.BindPFlags(flags)
// Make sure that klog logging variables are initialized so that we can
// update them from this file.
klog.InitFlags(nil)
// Make sure klog (used by the client-go dependency) logs to stderr, as it
// will try to log to directories that may not exist in the cilium-operator
// container (/tmp) and cause the cilium-operator to exit.
flag.Set("logtostderr", "true")
}
// initConfig reads in config file and ENV variables if set.
func initConfig() {
if viper.GetBool("version") {
fmt.Printf("Cilium %s\n", version.Version)
os.Exit(0)
}
option.Config.ClusterName = viper.GetString(option.ClusterName)
option.Config.ClusterID = viper.GetInt(option.ClusterIDName)
option.Config.DisableCiliumEndpointCRD = viper.GetBool(option.DisableCiliumEndpointCRDName)
option.Config.K8sNamespace = viper.GetString(option.K8sNamespaceName)
option.Config.AwsReleaseExcessIps = viper.GetBool(option.AwsReleaseExcessIps)
viper.SetEnvPrefix("cilium")
viper.SetConfigName("cilium-operator")
}
func kvstoreEnabled() bool {
if kvStore == "" {
return false
}
return identityAllocationMode == option.IdentityAllocationModeKVstore ||
synchronizeServices ||
synchronizeNodes
}
func getAPIServerAddr() []string {
return []string{fmt.Sprintf("127.0.0.1:%d", apiServerPort), fmt.Sprintf("[::1]:%d", apiServerPort)}
}
func runOperator(cmd *cobra.Command) {
logging.SetupLogging([]string{}, map[string]string{}, "cilium-operator", viper.GetBool("debug"))
if cmdRefDir != "" {
// Remove the line 'Auto generated by spf13/cobra on ...'
cmd.DisableAutoGenTag = true
if err := doc.GenMarkdownTreeCustom(cmd, cmdRefDir, filePrepend, linkHandler); err != nil {
log.Fatal(err)
}
os.Exit(0)
}
log.Infof("Cilium Operator %s", version.Version)
k8sInitDone := make(chan struct{})
go startServer(shutdownSignal, k8sInitDone, getAPIServerAddr()...)
if enableMetrics {
registerMetrics()
}
k8sClientQPSLimit := viper.GetFloat64(option.K8sClientQPSLimit)
k8sClientBurst := viper.GetInt(option.K8sClientBurst)
kvStore = viper.GetString(option.KVStore)
if m := viper.GetStringMapString(option.KVStoreOpt); len(m) > 0 {
kvStoreOpts = m
}
k8s.Configure(k8sAPIServer, k8sKubeConfigPath, float32(k8sClientQPSLimit), k8sClientBurst)
if err := k8s.Init(); err != nil {
log.WithError(err).Fatal("Unable to connect to Kubernetes apiserver")
}
close(k8sInitDone)
ciliumK8sClient = k8s.CiliumClient()
k8sversion.Update(k8s.Client())
if !k8sversion.Capabilities().MinimalVersionMet {
log.Fatalf("Minimal kubernetes version not met: %s < %s",
k8sversion.Version(), k8sversion.MinimalVersionConstraint)
}
// Restart kube-dns as soon as possible since it helps etcd-operator to be
// properly setup. If kube-dns is not managed by Cilium it can prevent
// etcd from reaching out kube-dns in EKS.
if option.Config.DisableCiliumEndpointCRD {
log.Infof("KubeDNS unmanaged pods controller disabled as %q option is set to 'disabled' in Cilium ConfigMap", option.DisableCiliumEndpointCRDName)
} else if unmanagedKubeDnsWatcherInterval != 0 {
enableUnmanagedKubeDNSController()
}
enableENI = viper.GetString(option.IPAM) == option.IPAMENI
if enableENI {
awsClientQPSLimit := viper.GetFloat64(option.AWSClientQPSLimit)
awsClientBurst := viper.GetInt(option.AWSClientBurst)
if err := startENIAllocator(awsClientQPSLimit, awsClientBurst); err != nil {
log.WithError(err).Fatal("Unable to start ENI allocator")
}
}
if enableENI {
startSynchronizingCiliumNodes()
}
if kvstoreEnabled() {
if synchronizeServices {
startSynchronizingServices()
}
var goopts *kvstore.ExtraOptions
scopedLog := log.WithFields(logrus.Fields{
"kvstore": kvStore,
"address": kvStoreOpts[fmt.Sprintf("%s.address", kvStore)],
})
if synchronizeServices {
// If K8s is enabled we can do the service translation automagically by
// looking at services from k8s and retrieve the service IP from that.
// This makes cilium to not depend on kube dns to interact with etcd
if k8s.IsEnabled() {
svcURL, isETCDOperator := kvstore.IsEtcdOperator(kvStore, kvStoreOpts, option.Config.K8sNamespace)
if isETCDOperator {
scopedLog.Info("cilium-operator running with service synchronization: automatic etcd service translation enabled")
svcGetter := k8s.ServiceIPGetter(&k8sSvcCache)
name, namespace, err := kvstore.SplitK8sServiceURL(svcURL)
if err != nil {
// If we couldn't derive the name/namespace for the given
// svcURL log the error so the user can see it.
// k8s.CreateCustomDialer won't be able to derive
// the name/namespace as well so it does not matter that
// we wait for all services to be synchronized with k8s.
scopedLog.WithError(err).WithFields(logrus.Fields{
"url": svcURL,
}).Error("Unable to derive service name from given url")
} else {
scopedLog.WithFields(logrus.Fields{
logfields.ServiceName: name,
logfields.ServiceNamespace: namespace,
}).Info("Retrieving service spec from k8s to perform automatic etcd service translation")
k8sSvc, err := k8s.Client().CoreV1().Services(namespace).Get(name, metav1.GetOptions{})
switch {
case err == nil:
// Create another service cache that contains the
// k8s service for etcd. As soon the k8s caches are
// synced, this hijack will stop happening.
sc := k8s.NewServiceCache()
sc.UpdateService(&types.Service{Service: k8sSvc})
svcGetter = &serviceGetter{
shortCutK8sCache: &sc,
k8sCache: &k8sSvcCache,
}
break
case errors.IsNotFound(err):
scopedLog.Error("Service not found in k8s")
default:
scopedLog.Warning("Unable to get service spec from k8s, this might cause network disruptions with etcd")
}
}
log := log.WithField(logfields.LogSubsys, "etcd")
goopts = &kvstore.ExtraOptions{
DialOption: []grpc.DialOption{
grpc.WithDialer(k8s.CreateCustomDialer(svcGetter, log)),
},
}
}
}
} else {
scopedLog.Info("cilium-operator running without service synchronization: automatic etcd service translation disabled")
}
scopedLog.Info("Connecting to kvstore...")
if err := kvstore.Setup(kvStore, kvStoreOpts, goopts); err != nil {
scopedLog.WithError(err).Fatal("Unable to setup kvstore")
}
if synchronizeNodes {
if err := runNodeWatcher(); err != nil {
log.WithError(err).Error("Unable to setup node watcher")
}
}
startKvstoreWatchdog()
}
switch identityAllocationMode {
case option.IdentityAllocationModeCRD:
if !k8s.IsEnabled() {
log.Fatal("CRD Identity allocation mode requires k8s to be configured.")
}
startManagingK8sIdentities()
if identityGCInterval != time.Duration(0) {
go startCRDIdentityGC()
}
}
if enableCepGC {
enableCiliumEndpointSyncGC()
}
if identityGCInterval != time.Duration(0) {
startIdentityGC()
}
err := enableCNPWatcher()
if err != nil {
log.WithError(err).WithField("subsys", "CNPWatcher").Fatal(
"Cannot connect to Kubernetes apiserver ")
}
log.Info("Initialization complete")
<-shutdownSignal
// graceful exit
log.Info("Received termination signal. Shutting down")
return
}