@@ -1581,15 +1581,28 @@ func (a *agent) manageProcessPriorityUntilGracefulShutdown() {
1581
1581
oomScore := unsetOOMScore
1582
1582
if scoreStr , ok := a .environmentVariables [EnvProcOOMScore ]; ok {
1583
1583
score , err := strconv .Atoi (strings .TrimSpace (scoreStr ))
1584
- if err == nil {
1584
+ if err == nil && score >= - 1000 && score <= 1000 {
1585
1585
oomScore = score
1586
+ } else {
1587
+ a .logger .Error (ctx , "invalid oom score" ,
1588
+ slog .F ("min_value" , - 1000 ),
1589
+ slog .F ("max_value" , 1000 ),
1590
+ slog .F ("value" , scoreStr ),
1591
+ )
1586
1592
}
1587
1593
}
1588
1594
1595
+ debouncer := & logDebouncer {
1596
+ logger : a .logger ,
1597
+ messages : map [string ]time.Time {},
1598
+ interval : time .Minute ,
1599
+ }
1600
+
1589
1601
for {
1590
- procs , err := a .manageProcessPriority (ctx , oomScore )
1602
+ procs , err := a .manageProcessPriority (ctx , debouncer , oomScore )
1603
+ // Avoid spamming the logs too often.
1591
1604
if err != nil {
1592
- a . logger .Error (ctx , "manage process priority" ,
1605
+ debouncer .Error (ctx , "manage process priority" ,
1593
1606
slog .Error (err ),
1594
1607
)
1595
1608
}
@@ -1605,13 +1618,16 @@ func (a *agent) manageProcessPriorityUntilGracefulShutdown() {
1605
1618
}
1606
1619
}
1607
1620
1621
+ // unsetOOMScore is set to an invalid OOM score to imply an unset value.
1608
1622
const unsetOOMScore = 1001
1609
1623
1610
- func (a * agent ) manageProcessPriority (ctx context.Context , oomScore int ) ([]* agentproc.Process , error ) {
1624
+ func (a * agent ) manageProcessPriority (ctx context.Context , debouncer * logDebouncer , oomScore int ) ([]* agentproc.Process , error ) {
1611
1625
const (
1612
1626
niceness = 10
1613
1627
)
1614
1628
1629
+ // We fetch the agent score each time because it's possible someone updates the
1630
+ // value after it is started.
1615
1631
agentScore , err := a .getAgentOOMScore ()
1616
1632
if err != nil {
1617
1633
agentScore = unsetOOMScore
@@ -1629,14 +1645,9 @@ func (a *agent) manageProcessPriority(ctx context.Context, oomScore int) ([]*age
1629
1645
1630
1646
var (
1631
1647
modProcs = []* agentproc.Process {}
1632
- logger slog.Logger
1633
1648
)
1634
1649
1635
1650
for _ , proc := range procs {
1636
- logger = a .logger .With (
1637
- slog .F ("cmd" , proc .Cmd ()),
1638
- slog .F ("pid" , proc .PID ),
1639
- )
1640
1651
1641
1652
containsFn := func (e string ) bool {
1642
1653
contains := strings .Contains (proc .Cmd (), e )
@@ -1651,7 +1662,9 @@ func (a *agent) manageProcessPriority(ctx context.Context, oomScore int) ([]*age
1651
1662
1652
1663
score , niceErr := proc .Niceness (a .syscaller )
1653
1664
if niceErr != nil && ! xerrors .Is (niceErr , os .ErrPermission ) {
1654
- logger .Warn (ctx , "unable to get proc niceness" ,
1665
+ debouncer .Warn (ctx , "unable to get proc niceness" ,
1666
+ slog .F ("cmd" , proc .Cmd ()),
1667
+ slog .F ("pid" , proc .PID ),
1655
1668
slog .Error (niceErr ),
1656
1669
)
1657
1670
continue
@@ -1662,27 +1675,31 @@ func (a *agent) manageProcessPriority(ctx context.Context, oomScore int) ([]*age
1662
1675
// Getpriority actually returns priority for the nice value
1663
1676
// which is niceness + 20, so here 20 = a niceness of 0 (aka unset).
1664
1677
if score != 20 {
1678
+ // We don't log here since it can get spammy
1665
1679
continue
1666
1680
}
1667
1681
1668
1682
if niceErr == nil {
1669
1683
err := proc .SetNiceness (a .syscaller , niceness )
1670
1684
if err != nil && ! xerrors .Is (err , os .ErrPermission ) {
1671
- logger .Warn (ctx , "unable to set proc niceness" ,
1685
+ debouncer .Warn (ctx , "unable to set proc niceness" ,
1686
+ slog .F ("cmd" , proc .Cmd ()),
1687
+ slog .F ("pid" , proc .PID ),
1672
1688
slog .F ("niceness" , niceness ),
1673
1689
slog .Error (err ),
1674
1690
)
1675
1691
}
1676
1692
}
1677
1693
1678
- // If the oom score is valid and it's not already set and isn't a custom value set by another process
1679
- // then it's ok to update it.
1694
+ // If the oom score is valid and it's not already set and isn't a custom value set by another process then it's ok to update it.
1680
1695
if oomScore != unsetOOMScore && oomScore != proc .OOMScoreAdj && ! isCustomOOMScore (agentScore , proc ) {
1681
1696
oomScoreStr := strconv .Itoa (oomScore )
1682
1697
err := afero .WriteFile (a .filesystem , fmt .Sprintf ("/proc/%d/oom_score_adj" , proc .PID ), []byte (oomScoreStr ), 0o644 )
1683
1698
if err != nil && ! xerrors .Is (err , os .ErrPermission ) {
1684
- logger .Warn (ctx , "unable to set oom_score_adj" ,
1685
- slog .F ("score" , "0" ),
1699
+ debouncer .Warn (ctx , "unable to set oom_score_adj" ,
1700
+ slog .F ("cmd" , proc .Cmd ()),
1701
+ slog .F ("pid" , proc .PID ),
1702
+ slog .F ("score" , oomScoreStr ),
1686
1703
slog .Error (err ),
1687
1704
)
1688
1705
}
@@ -2081,3 +2098,37 @@ func isCustomOOMScore(agentScore int, process *agentproc.Process) bool {
2081
2098
score := process .OOMScoreAdj
2082
2099
return agentScore != score && score != 1000 && score != 0 && score != 998
2083
2100
}
2101
+
2102
+ // logDebouncer prevents generating a log for a particular message if
2103
+ // it's been emitted within the given interval duration.
2104
+ // It's a shoddy implementation use in one spot that should be replaced at
2105
+ // some point.
2106
+ type logDebouncer struct {
2107
+ logger slog.Logger
2108
+ messages map [string ]time.Time
2109
+ interval time.Duration
2110
+ }
2111
+
2112
+ func (l * logDebouncer ) Warn (ctx context.Context , msg string , fields ... any ) {
2113
+ l .log (ctx , slog .LevelWarn , msg , fields ... )
2114
+ }
2115
+
2116
+ func (l * logDebouncer ) Error (ctx context.Context , msg string , fields ... any ) {
2117
+ l .log (ctx , slog .LevelError , msg , fields ... )
2118
+ }
2119
+
2120
+ func (l * logDebouncer ) log (ctx context.Context , level slog.Level , msg string , fields ... any ) {
2121
+ // This (bad) implementation assumes you wouldn't reuse the same msg
2122
+ // for different levels.
2123
+ last , ok := l .messages [msg ]
2124
+ if ok && time .Since (last ) < l .interval {
2125
+ return
2126
+ }
2127
+ switch level {
2128
+ case slog .LevelWarn :
2129
+ l .logger .Warn (ctx , msg , fields ... )
2130
+ case slog .LevelError :
2131
+ l .logger .Error (ctx , msg , fields ... )
2132
+ }
2133
+ l .messages [msg ] = time .Now ()
2134
+ }
0 commit comments