alerting.go 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. package metrics
  2. import (
  3. "time"
  4. log "github.com/sirupsen/logrus"
  5. )
  6. const (
  7. maxCPUAlertingThresholdPCT = 85
  8. maxRAMAlertingThresholdPCT = 85
  9. maxDiskAlertingThresholdPCT = 90
  10. )
  11. var (
  12. inCPUAlertingState = false
  13. inRAMAlertingState = false
  14. inDiskAlertingState = false
  15. )
  16. var errorResetDuration = time.Minute * 5
  17. const alertingError = "The %s utilization of %f%% could cause problems with video generation and delivery. Visit the documentation at http://owncast.online/docs/troubleshooting/ if you are experiencing issues."
  18. func handleAlerting() {
  19. handleCPUAlerting()
  20. handleRAMAlerting()
  21. handleDiskAlerting()
  22. }
  23. func handleCPUAlerting() {
  24. if len(metrics.CPUUtilizations) < 2 {
  25. return
  26. }
  27. avg := recentAverage(metrics.CPUUtilizations)
  28. if avg > maxCPUAlertingThresholdPCT && !inCPUAlertingState {
  29. log.Warnf(alertingError, "CPU", avg)
  30. inCPUAlertingState = true
  31. resetTimer := time.NewTimer(errorResetDuration)
  32. go func() {
  33. <-resetTimer.C
  34. inCPUAlertingState = false
  35. }()
  36. }
  37. }
  38. func handleRAMAlerting() {
  39. if len(metrics.RAMUtilizations) < 2 {
  40. return
  41. }
  42. avg := recentAverage(metrics.RAMUtilizations)
  43. if avg > maxRAMAlertingThresholdPCT && !inRAMAlertingState {
  44. log.Warnf(alertingError, "memory", avg)
  45. inRAMAlertingState = true
  46. resetTimer := time.NewTimer(errorResetDuration)
  47. go func() {
  48. <-resetTimer.C
  49. inRAMAlertingState = false
  50. }()
  51. }
  52. }
  53. func handleDiskAlerting() {
  54. if len(metrics.DiskUtilizations) < 2 {
  55. return
  56. }
  57. avg := recentAverage(metrics.DiskUtilizations)
  58. if avg > maxDiskAlertingThresholdPCT && !inDiskAlertingState {
  59. log.Warnf(alertingError, "disk", avg)
  60. inDiskAlertingState = true
  61. resetTimer := time.NewTimer(errorResetDuration)
  62. go func() {
  63. <-resetTimer.C
  64. inDiskAlertingState = false
  65. }()
  66. }
  67. }
  68. func recentAverage(values []TimestampedValue) float64 {
  69. return (values[len(values)-1].Value + values[len(values)-2].Value) / 2
  70. }