/
incrementality.go
596 lines (555 loc) · 20 KB
/
incrementality.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
// Utilities to help with incremental builds.
//
// There are four things we consider for each rule:
// - the global config, some parts of which affect all rules
// - the rule definition itself (the command to run, etc)
// - any input files it might have
// - any dependencies.
//
// If all of those are the same as the last time the rule was run,
// we can safely assume that the output will be the same this time
// and so we don't have to re-run it again.
package build
import (
"bytes"
"crypto/sha1"
"encoding/base64"
"encoding/gob"
"fmt"
"hash"
"io"
"io/ioutil"
"os"
"path"
"path/filepath"
"sort"
"strings"
"sync"
"core"
"fs"
)
const hashLength = sha1.Size
// Length of the hash file we write
const hashFileLength = 5 * hashLength
// Length of old hash files that don't include secrets.
// Because that's basically everything we're going to keep compatibility for a while.
const oldHashFileLength = 4 * hashLength
// noSecrets is the thing we write when a rule doesn't have any secrets defined.
var noSecrets = []byte{45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45}
// Used to write something when we need to indicate a boolean in a hash. Can be essentially
// any value as long as they're different from one another.
var boolTrueHashValue = []byte{2}
var boolFalseHashValue = []byte{1}
// Return true if the rule needs building, false if the existing outputs are OK.
func needsBuilding(state *core.BuildState, target *core.BuildTarget, postBuild bool) bool {
// Check the dependencies first, because they don't need any disk I/O.
if target.NeedsTransitiveDependencies {
if anyDependencyHasChanged(target) {
return true // one of the transitive deps has changed, need to rebuild
}
} else {
for _, dep := range target.Dependencies() {
if dep.State() < core.Unchanged {
log.Debug("Need to rebuild %s, %s has changed", target.Label, dep.Label)
return true // dependency has just been rebuilt, do this too.
}
}
}
oldRuleHash, oldConfigHash, oldSourceHash, oldSecretHash := readRuleHashFile(ruleHashFileName(target), postBuild)
if !bytes.Equal(oldConfigHash, state.Hashes.Config) {
if len(oldConfigHash) == 0 {
// Small nicety to make it a bit clearer what's going on.
log.Debug("Need to build %s, outputs aren't there", target.Label)
} else {
log.Debug("Need to rebuild %s, config has changed (was %s, need %s)", target.Label, b64(oldConfigHash), b64(state.Hashes.Config))
}
return true
}
newRuleHash := RuleHash(state, target, false, postBuild)
if !bytes.Equal(oldRuleHash, newRuleHash) {
log.Debug("Need to rebuild %s, rule has changed (was %s, need %s)", target.Label, b64(oldRuleHash), b64(newRuleHash))
return true
}
newSourceHash, err := sourceHash(state, target)
if err != nil || !bytes.Equal(oldSourceHash, newSourceHash) {
log.Debug("Need to rebuild %s, sources have changed (was %s, need %s)", target.Label, b64(oldSourceHash), b64(newSourceHash))
return true
}
newSecretHash, err := secretHash(target)
if err != nil || !bytes.Equal(oldSecretHash, newSecretHash) {
log.Debug("Need to rebuild %s, secrets have changed (was %s, need %s)", target.Label, b64(oldSecretHash), b64(newSecretHash))
return true
}
// Check the outputs of this rule exist. This would only happen if the user had
// removed them but it's incredibly aggravating if you remove an output and the
// rule won't rebuild itself.
for _, output := range target.Outputs() {
realOutput := path.Join(target.OutDir(), output)
if !core.PathExists(realOutput) {
log.Debug("Output %s doesn't exist for rule %s; will rebuild.", realOutput, target.Label)
return true
}
}
// Maybe we've forced a rebuild. Do this last; might be interesting to see if it needed building anyway.
return state.ForceRebuild && (state.IsOriginalTarget(target.Label) || state.IsOriginalTarget(target.Label.Parent()))
}
// b64 base64 encodes a string of bytes for printing.
func b64(b []byte) string {
if len(b) == 0 {
return "<not found>"
}
return base64.RawStdEncoding.EncodeToString(b)
}
// Returns true if any transitive dependency of this target has changed.
func anyDependencyHasChanged(target *core.BuildTarget) bool {
done := map[core.BuildLabel]bool{}
var inner func(*core.BuildTarget) bool
inner = func(dependency *core.BuildTarget) bool {
done[dependency.Label] = true
if dependency != target && dependency.State() < core.Unchanged {
return true
} else if !dependency.OutputIsComplete || dependency == target {
for _, dep := range dependency.Dependencies() {
if !done[dep.Label] {
if inner(dep) {
log.Debug("Need to rebuild %s, %s has changed", target.Label, dep.Label)
return true
}
}
}
}
return false
}
return inner(target)
}
func mustSourceHash(state *core.BuildState, target *core.BuildTarget) []byte {
b, err := sourceHash(state, target)
if err != nil {
log.Fatalf("%s", err)
}
return b
}
// Calculate the hash of all sources of this rule
func sourceHash(state *core.BuildState, target *core.BuildTarget) ([]byte, error) {
h := sha1.New()
for source := range core.IterSources(state.Graph, target) {
result, err := pathHash(source.Src, false)
if err != nil {
return nil, err
}
h.Write(result)
h.Write([]byte(source.Src))
}
for _, tool := range target.AllTools() {
if label := tool.Label(); label != nil {
// Note that really it would be more correct to hash the outputs of these rules
// in the same way we calculate a hash of sources for the rule, but that is
// impractical for some cases (notably npm) where tools can be very large.
// Instead we assume calculating the target hash is sufficient.
h.Write(mustTargetHash(state, state.Graph.TargetOrDie(*label)))
} else {
result, err := pathHash(tool.FullPaths(state.Graph)[0], false)
if err != nil {
return nil, err
}
h.Write(result)
}
}
return h.Sum(nil), nil
}
// Used to memoize the results of pathHash so we don't hash the same files multiple times.
var pathHashMemoizer = map[string][]byte{}
var pathHashMutex sync.RWMutex // Of course it will be accessed concurrently.
// Calculate the hash of a single path which might be a file or a directory
// This is the memoized form that only hashes each path once, unless recalc is true in which
// case it will force a recalculation of the hash.
func pathHash(path string, recalc bool) ([]byte, error) {
path = ensureRelative(path)
if !recalc {
pathHashMutex.RLock()
cached, present := pathHashMemoizer[path]
pathHashMutex.RUnlock()
if present {
return cached, nil
}
}
result, err := pathHashImpl(path)
if err == nil {
pathHashMutex.Lock()
pathHashMemoizer[path] = result
pathHashMutex.Unlock()
}
return result, err
}
func mustPathHash(path string) []byte {
hash, err := pathHash(path, false)
if err != nil {
panic(err)
}
return hash
}
func pathHashImpl(path string) ([]byte, error) {
h := sha1.New()
info, err := os.Lstat(path)
if err == nil && info.Mode()&os.ModeSymlink != 0 {
// Handle symlinks specially (don't attempt to read their contents).
dest, err := os.Readlink(path)
if err != nil {
return nil, err
}
// Write something arbitrary indicating this is a symlink.
// This isn't quite perfect - it could potentially get mixed up with a file with the
// appropriate contents, but that is not really likely.
h.Write(boolTrueHashValue)
h.Write([]byte(dest))
return h.Sum(nil), nil
} else if err == nil && info.IsDir() {
err = fs.WalkMode(path, func(p string, isDir bool, mode os.FileMode) error {
if mode&os.ModeSymlink != 0 {
// Is a symlink, must verify that it's not a link outside the tmp dir.
deref, err := filepath.EvalSymlinks(p)
if err != nil {
return err
}
if !strings.HasPrefix(deref, path) {
return fmt.Errorf("Output %s links outside the build dir (to %s)", p, deref)
}
// Deliberately do not attempt to read it. We will read the contents later since
// it is a link within the temp dir anyway, and if it's a link to a directory
// it can introduce a cycle.
// Just write something to the hash indicating that we found something here,
// otherwise rules might be marked as unchanged if they added additional symlinks.
h.Write(boolTrueHashValue)
} else if !isDir {
return fileHash(&h, p)
}
return nil
})
} else {
err = fileHash(&h, path) // let this handle any other errors
}
return h.Sum(nil), err
}
// movePathHash is used when we move files from tmp to out and there was one there before; that's
// the only case in which the hash of a filepath could change.
func movePathHash(oldPath, newPath string, copy bool) {
oldPath = ensureRelative(oldPath)
newPath = ensureRelative(newPath)
pathHashMutex.Lock()
defer pathHashMutex.Unlock()
if oldHash, present := pathHashMemoizer[oldPath]; present {
pathHashMemoizer[newPath] = oldHash
// If the path is in plz-out/tmp we aren't ever going to use it again, so free some space.
if !copy && strings.HasPrefix(oldPath, core.TmpDir) {
delete(pathHashMemoizer, oldPath)
}
}
}
// setPathHash is used to directly set a hash for a path.
// This is used for remote files where we download them & therefore know the hash as they come in.
// TODO(peterebden): We should probably use this more for things like caches and so forth...
func setPathHash(path string, hash []byte) {
pathHashMutex.Lock()
pathHashMemoizer[path] = hash
pathHashMutex.Unlock()
}
// ensureRelative ensures a path is relative to the repo root.
// This is important for getting best performance from memoizing the path hashes.
func ensureRelative(path string) string {
if strings.HasPrefix(path, core.RepoRoot) {
return strings.TrimLeft(strings.TrimPrefix(path, core.RepoRoot), "/")
}
return path
}
// Calculate the hash of a single file
func fileHash(h *hash.Hash, filename string) error {
file, err := os.Open(filename)
if err != nil {
return err
}
_, err = io.Copy(*h, file)
file.Close()
return err
}
// RuleHash calculates a hash for the relevant bits of this rule that affect its output.
// Optionally it can include parts of the rule that affect runtime (most obviously test-time).
// Note that we have to hash on the declared fields, we obviously can't hash pointers etc.
// incrementality_test will warn if new fields are added to the struct but not here.
func RuleHash(state *core.BuildState, target *core.BuildTarget, runtime, postBuild bool) []byte {
if runtime || (postBuild && target.PostBuildFunction != nil) {
return ruleHash(state, target, runtime)
}
// Non-post-build hashes get stored on the target itself.
if len(target.RuleHash) != 0 {
return target.RuleHash
}
target.RuleHash = ruleHash(state, target, false) // This is never a runtime hash.
return target.RuleHash
}
func ruleHash(state *core.BuildState, target *core.BuildTarget, runtime bool) []byte {
h := sha1.New()
h.Write([]byte(target.Label.String()))
for _, dep := range target.DeclaredDependencies() {
h.Write([]byte(dep.String()))
}
for _, vis := range target.Visibility {
h.Write([]byte(vis.String())) // Doesn't strictly affect the output, but best to be safe.
}
for _, hsh := range target.Hashes {
h.Write([]byte(hsh))
}
for _, source := range target.AllSources() {
h.Write([]byte(source.String()))
}
for _, out := range target.DeclaredOutputs() {
h.Write([]byte(out))
}
outs := target.DeclaredNamedOutputs()
for _, name := range target.DeclaredOutputNames() {
h.Write([]byte(name))
for _, out := range outs[name] {
h.Write([]byte(out))
}
}
for _, licence := range target.Licences {
h.Write([]byte(licence))
}
for _, output := range target.TestOutputs {
h.Write([]byte(output))
}
for _, output := range target.OptionalOutputs {
h.Write([]byte(output))
}
for _, label := range target.Labels {
h.Write([]byte(label))
}
for _, secret := range target.Secrets {
h.Write([]byte(secret))
}
hashBool(h, target.IsBinary)
hashBool(h, target.IsTest)
hashOptionalBool(h, target.Sandbox)
// Note that we only hash the current command here; whatever's set in commands that we're not going
// to run is uninteresting to us.
h.Write([]byte(target.GetCommand(state)))
if runtime {
// Similarly, we only hash the current command here again.
h.Write([]byte(target.GetTestCommand(state)))
for _, datum := range target.Data {
h.Write([]byte(datum.String()))
}
hashBool(h, target.Containerise)
hashOptionalBool(h, target.TestSandbox)
if target.ContainerSettings != nil {
e := gob.NewEncoder(h)
if err := e.Encode(target.ContainerSettings); err != nil {
panic(err)
}
}
if target.Containerise {
h.Write(state.Hashes.Containerisation)
}
}
hashBool(h, target.NeedsTransitiveDependencies)
hashBool(h, target.OutputIsComplete)
// Should really not be conditional here, but we don't want adding the new flag to
// change the hash of every single other target everywhere.
// Might consider removing this the next time we peturb the hashing strategy.
hashOptionalBool(h, target.Stamp)
hashOptionalBool(h, target.IsFilegroup)
hashOptionalBool(h, target.IsHashFilegroup)
hashOptionalBool(h, target.IsRemoteFile)
for _, require := range target.Requires {
h.Write([]byte(require))
}
// Indeterminate iteration order, yay...
languages := []string{}
for k := range target.Provides {
languages = append(languages, k)
}
sort.Strings(languages)
for _, lang := range languages {
h.Write([]byte(lang))
h.Write([]byte(target.Provides[lang].String()))
}
// We don't need to hash the functions themselves because they get rerun every time -
// we just need to check whether one is added or removed, which is good since it's
// nigh impossible to really verify whether it's changed or not (since it may call
// any amount of other stuff).
hashBool(h, target.PreBuildFunction != nil)
hashBool(h, target.PostBuildFunction != nil)
return h.Sum(nil)
}
func hashBool(writer hash.Hash, b bool) {
if b {
writer.Write(boolTrueHashValue)
} else {
writer.Write(boolFalseHashValue)
}
}
func hashOptionalBool(writer hash.Hash, b bool) {
if b {
hashBool(writer, b)
}
}
// readRuleHashFile reads the contents of a rule hash file into separate byte arrays
// Arrays will be empty if there's an error reading the file.
// If postBuild is true then the rule hash will be the post-build one if present.
func readRuleHashFile(filename string, postBuild bool) ([]byte, []byte, []byte, []byte) {
contents := make([]byte, hashFileLength)
file, err := os.Open(filename)
if err != nil {
if !os.IsNotExist(err) {
log.Warning("Failed to read rule hash file %s: %s", filename, err)
}
return nil, nil, nil, nil
}
defer file.Close()
if n, err := file.Read(contents); err != nil {
log.Warning("Error reading rule hash file %s: %s", filename, err)
return nil, nil, nil, nil
} else if n == oldHashFileLength {
// Handle older hash files that don't have secrets in them.
copy(contents[4*hashLength:hashFileLength], noSecrets)
} else if n != hashFileLength {
log.Warning("Unexpected rule hash file length: expected %d bytes, was %d", hashFileLength, n)
return nil, nil, nil, nil
}
if postBuild {
return contents[hashLength : 2*hashLength], contents[2*hashLength : 3*hashLength], contents[3*hashLength : 4*hashLength], contents[4*hashLength : hashFileLength]
}
return contents[0:hashLength], contents[2*hashLength : 3*hashLength], contents[3*hashLength : 4*hashLength], contents[4*hashLength : hashFileLength]
}
// Writes the contents of the rule hash file
func writeRuleHashFile(state *core.BuildState, target *core.BuildTarget) error {
hash, err := targetHash(state, target)
if err != nil {
return err
}
secretHash, err := secretHash(target)
if err != nil {
return err
}
file, err := os.Create(ruleHashFileName(target))
if err != nil {
return err
}
defer file.Close()
n, err := file.Write(append(hash, secretHash...))
if err != nil {
return err
} else if n != hashFileLength {
return fmt.Errorf("Wrote %d bytes to rule hash file; should be %d", n, hashFileLength)
}
return nil
}
// Returns the filename we'll store the hashes for this file in.
func ruleHashFileName(target *core.BuildTarget) string {
return path.Join(target.OutDir(), ".rule_hash_"+target.Label.Name)
}
func postBuildOutputFileName(target *core.BuildTarget) string {
return path.Join(target.OutDir(), target.PostBuildOutputFileName())
}
// For targets that have post-build functions, we have to store and retrieve the target's
// output to feed to it
func loadPostBuildOutput(state *core.BuildState, target *core.BuildTarget) (string, error) {
// Normally filegroups don't have post-build functions, but we use this sometimes for testing.
if target.IsFilegroup {
return "", nil
}
out, err := ioutil.ReadFile(postBuildOutputFileName(target))
if err != nil {
return "", err
}
return string(out), nil
}
func storePostBuildOutput(state *core.BuildState, target *core.BuildTarget, out []byte) {
filename := postBuildOutputFileName(target)
if err := os.RemoveAll(filename); err != nil {
panic(err)
}
if err := ioutil.WriteFile(filename, out, 0644); err != nil {
panic(err)
}
}
// targetHash returns the hash for a target and any error encountered while calculating it.
func targetHash(state *core.BuildState, target *core.BuildTarget) ([]byte, error) {
hash := append(RuleHash(state, target, false, false), RuleHash(state, target, false, true)...)
hash = append(hash, state.Hashes.Config...)
hash2, err := sourceHash(state, target)
if err != nil {
return nil, err
}
return append(hash, hash2...), nil
}
// mustTargetHash returns the hash for a target and panics if it can't be calculated.
func mustTargetHash(state *core.BuildState, target *core.BuildTarget) []byte {
hash, err := targetHash(state, target)
if err != nil {
panic(err)
}
return hash
}
// mustShortTargetHash returns the hash for a target, shortened to 1/4 length.
func mustShortTargetHash(state *core.BuildState, target *core.BuildTarget) []byte {
return core.CollapseHash(mustTargetHash(state, target))
}
// RuntimeHash returns the target hash, source hash, config hash & runtime file hash,
// all rolled into one. Essentially this is one hash needed to determine if the runtime
// state is consistent.
func RuntimeHash(state *core.BuildState, target *core.BuildTarget) ([]byte, error) {
hash := append(RuleHash(state, target, true, false), RuleHash(state, target, true, true)...)
hash = append(hash, state.Hashes.Config...)
sh, err := sourceHash(state, target)
if err != nil {
return nil, err
}
h := sha1.New()
h.Write(sh)
for source := range core.IterRuntimeFiles(state.Graph, target, true) {
result, err := pathHash(source.Src, false)
if err != nil {
return result, err
}
h.Write(result)
}
return append(hash, h.Sum(nil)...), nil
}
// PrintHashes prints the various hashes for a target to stdout.
// It's used by plz hash --detailed to show a breakdown of the input hashes of a target.
func PrintHashes(state *core.BuildState, target *core.BuildTarget) {
fmt.Printf("%s:\n", target.Label)
fmt.Printf(" Config: %s\n", b64(state.Hashes.Config))
fmt.Printf(" Rule: %s (pre-build)\n", b64(RuleHash(state, target, false, false)))
fmt.Printf(" Rule: %s (post-build)\n", b64(RuleHash(state, target, false, true)))
fmt.Printf(" Source: %s\n", b64(mustSourceHash(state, target)))
// Note that the logic here mimics sourceHash, but I don't want to pollute that with
// optional printing nonsense since it's on our hot path.
for source := range core.IterSources(state.Graph, target) {
fmt.Printf(" Source: %s: %s\n", source.Src, b64(mustPathHash(source.Src)))
}
for _, tool := range target.AllTools() {
if label := tool.Label(); label != nil {
fmt.Printf(" Tool: %s: %s\n", *label, b64(mustShortTargetHash(state, state.Graph.TargetOrDie(*label))))
} else {
fmt.Printf(" Tool: %s: %s\n", tool, b64(mustPathHash(tool.FullPaths(state.Graph)[0])))
}
}
}
// secretHash calculates a hash for any secrets of a target.
func secretHash(target *core.BuildTarget) ([]byte, error) {
if len(target.Secrets) == 0 {
return noSecrets, nil
}
h := sha1.New()
for _, secret := range target.Secrets {
ph, err := pathHash(secret, false)
if err != nil && os.IsNotExist(err) {
return noSecrets, nil // Not having the secrets is not an error yet.
} else if err != nil {
return nil, err
}
h.Write(ph)
}
return h.Sum(nil), nil
}