diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..11c3600
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.tpl linguist-language=go
\ No newline at end of file
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 0000000..4433626
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,33 @@
+name: Lint
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Setup Go
+ uses: actions/setup-go@v2
+ with:
+ go-version: 1.23
+
+ - name: Install Helm
+ uses: azure/setup-helm@v4
+ with:
+ version: v3.17.1
+
+ - name: Install yq
+ run: |
+ sudo wget https://github.com/mikefarah/yq/releases/download/v4.42.1/yq_linux_amd64 -O /usr/bin/yq &&\
+ sudo chmod +x /usr/bin/yq
+
+ - name: Lint Helm chart and rules
+ run: make lint
\ No newline at end of file
diff --git a/.github/workflows/nightly-build.yaml b/.github/workflows/nightly-build.yaml
new file mode 100644
index 0000000..e64f92c
--- /dev/null
+++ b/.github/workflows/nightly-build.yaml
@@ -0,0 +1,48 @@
+name: Nightly build
+
+on:
+ schedule:
+ - cron: '0 0 * * *'
+ workflow_dispatch: # Allows manual triggering of the workflow
+
+jobs:
+ nightly-build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Setup Go
+ uses: actions/setup-go@v2
+ with:
+ go-version: 1.22
+
+ - name: Install Helm
+ uses: azure/setup-helm@v4
+ with:
+ version: v3.17.1
+
+ - name: Install yq
+ run: |
+ sudo wget https://github.com/mikefarah/yq/releases/download/v4.42.1/yq_linux_amd64 -O /usr/bin/yq &&\
+ sudo chmod +x /usr/bin/yq
+
+ - name: make build
+ run: |
+ make build > output.log 2>&1
+ continue-on-error: false
+
+ - name: Upload script output
+ uses: actions/upload-artifact@v4
+ with:
+ name: script-output
+ path: output.log
+
+ - name: Create issue from file on failure
+ if: failure()
+ uses: peter-evans/create-issue-from-file@v5
+ with:
+ title: nightly build failure
+ content-filepath: output.log
+ assignees: dannykopping
\ No newline at end of file
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
new file mode 100644
index 0000000..23d45a6
--- /dev/null
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,55 @@
+# GitHub release workflow.
+name: publish-helm
+on:
+ push:
+ tags:
+ - v*
+
+permissions:
+ # Required to publish a release
+ contents: write
+ # Necessary to push docker images to ghcr.io.
+ packages: write
+ # Necessary for GCP authentication (https://github.com/google-github-actions/setup-gcloud#usage)
+ id-token: write
+
+concurrency: ${{ github.workflow }}-${{ github.ref }}
+
+jobs:
+ release:
+ name: Build and publish
+ runs-on: ubuntu-latest
+ outputs:
+ version: ${{ steps.version.outputs.version }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ # If the event that triggered the build was an annotated tag (which our
+ # tags are supposed to be), actions/checkout has a bug where the tag in
+ # question is only a lightweight tag and not a full annotated tag. This
+ # command seems to fix it.
+ # https://github.com/actions/checkout/issues/290
+ - name: Fetch git tags
+ run: git fetch --tags --force
+
+ - name: Authenticate to Google Cloud
+ uses: google-github-actions/auth@v2
+ with:
+ workload_identity_provider: projects/898976630798/locations/global/workloadIdentityPools/coder-ci/providers/github-actions
+ service_account: coder-observability@coder-customer-releases.iam.gserviceaccount.com
+
+ - name: Setup GCloud SDK
+ uses: "google-github-actions/setup-gcloud@v2"
+
+ - name: Install helm
+ uses: azure/setup-helm@v4
+ with:
+ version: v3.9.2
+
+ - name: Publish Helm Chart
+ if: ${{ !inputs.dry_run }}
+ run: |
+ ./scripts/publish.sh
diff --git a/.gitignore b/.gitignore
index ee3892e..949b791 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
charts/
+build/
+scratch
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8bb9049
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,30 @@
+# CHANGELOG
+
+## v0.3.0
+
+- Adding prebuilt workspace dashboard & alerts
+
+## v0.2.1
+
+- Upgraded subcharts
+ - Loki: upgraded to v6.7.1 -> v6.7.3
+- FIX: `listen-address` duplicate removed in `prometheus-config-reloader`
+
+## v0.2.0
+
+- Upgraded subcharts
+ - Grafana: upgraded from v7.3.7 -> v7.3.12
+ - Prometheus: upgraded to v25.18.0 -> v25.24.1
+ - Loki: upgraded to v6.3.4 -> v6.7.1
+
+## v0.1.0
+
+- Lint Helm chart in CI
+
+## v0.0.2 -> v0.0.11
+
+- Several stability & configurability improvements
+
+## v0.0.1
+
+- Initial release
diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 0000000..4521512
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1 @@
+* @dannykopping
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 38bdde7..f7c5d7f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,121 @@
-MIT License
-
-Copyright (c) 2024 Coder
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+ HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+ protected by copyright and related or neighboring rights ("Copyright and
+ Related Rights"). Copyright and Related Rights include, but are not
+ limited to, the following:
+
+i. the right to reproduce, adapt, distribute, perform, display,
+communicate, and translate a Work;
+ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+likeness depicted in a Work;
+iv. rights protecting against unfair competition in regards to a Work,
+subject to the limitations in paragraph 4(a), below;
+v. rights protecting the extraction, dissemination, use and reuse of data
+in a Work;
+vi. database rights (such as those arising under Directive 96/9/EC of the
+European Parliament and of the Council of 11 March 1996 on the legal
+protection of databases, and under any national implementation
+thereof, including any amended or successor version of such
+directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+world based on applicable law or treaty, and any national
+implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+ of, applicable law, Affirmer hereby overtly, fully, permanently,
+ irrevocably and unconditionally waives, abandons, and surrenders all of
+ Affirmer's Copyright and Related Rights and associated claims and causes
+ of action, whether now known or unknown (including existing as well as
+ future claims and causes of action), in the Work (i) in all territories
+ worldwide, (ii) for the maximum duration provided by applicable law or
+ treaty (including future time extensions), (iii) in any current or future
+ medium and for any number of copies, and (iv) for any purpose whatsoever,
+ including without limitation commercial, advertising or promotional
+ purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+ member of the public at large and to the detriment of Affirmer's heirs and
+ successors, fully intending that such Waiver shall not be subject to
+ revocation, rescission, cancellation, termination, or any other legal or
+ equitable action to disrupt the quiet enjoyment of the Work by the public
+ as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+ be judged legally invalid or ineffective under applicable law, then the
+ Waiver shall be preserved to the maximum extent permitted taking into
+ account Affirmer's express Statement of Purpose. In addition, to the
+ extent the Waiver is so judged Affirmer hereby grants to each affected
+ person a royalty-free, non transferable, non sublicensable, non exclusive,
+ irrevocable and unconditional license to exercise Affirmer's Copyright and
+ Related Rights in the Work (i) in all territories worldwide, (ii) for the
+ maximum duration provided by applicable law or treaty (including future
+ time extensions), (iii) in any current or future medium and for any number
+ of copies, and (iv) for any purpose whatsoever, including without
+ limitation commercial, advertising or promotional purposes (the
+ "License"). The License shall be deemed effective as of the date CC0 was
+ applied by Affirmer to the Work. Should any part of the License for any
+ reason be judged legally invalid or ineffective under applicable law, such
+ partial invalidity or ineffectiveness shall not invalidate the remainder
+ of the License, and in such case Affirmer hereby affirms that he or she
+ will not (i) exercise any of his or her remaining Copyright and Related
+ Rights in the Work or (ii) assert any associated claims and causes of
+ action with respect to the Work, in either case contrary to Affirmer's
+ express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+a. No trademark or patent rights held by Affirmer are waived, abandoned,
+surrendered, licensed or otherwise affected by this document.
+b. Affirmer offers the Work as-is and makes no representations or
+warranties of any kind concerning the Work, express, implied,
+statutory or otherwise, including without limitation warranties of
+title, merchantability, fitness for a particular purpose, non
+infringement, or the absence of latent or other defects, accuracy, or
+the present or absence of errors, whether or not discoverable, all to
+the greatest extent permissible under applicable law.
+c. Affirmer disclaims responsibility for clearing rights of other persons
+that may apply to the Work or any use thereof, including without
+limitation any person's Copyright and Related Rights in the Work.
+Further, Affirmer disclaims responsibility for obtaining any necessary
+consents, permissions or other rights required for any use of the
+Work.
+d. Affirmer understands and acknowledges that Creative Commons is not a
+party to this document and has no duty or obligation with respect to
+this CC0 or use of the Work.
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..3973683
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,46 @@
+# Use a single bash shell for each job, and immediately exit on failure
+SHELL := bash
+.SHELLFLAGS = -ceu
+.ONESHELL:
+
+# This doesn't work on directories.
+# See https://stackoverflow.com/questions/25752543/make-delete-on-error-for-directory-targets
+.DELETE_ON_ERROR:
+
+all: lint
+.PHONY: all
+
+lint: build lint/helm lint/rules readme
+ ./scripts/check-unstaged.sh
+.PHONY: lint
+
+lint/helm: lint/helm/coder-observability
+.PHONY: lint/helm
+
+lint/helm/coder-observability:
+ helm lint --strict --set coder.image.tag=v$(shell ./scripts/version.sh) coder-observability/
+.PHONY: lint/helm/coder-observability
+
+build:
+ ./scripts/compile.sh
+.PHONY: build
+
+lint/rules: lint/helm/prometheus-rules
+.PHONY: lint/rules
+
+lint/helm/prometheus-rules:
+ @./scripts/lint-rules.sh
+
+.PHONY: lint/helm/prometheus-rules
+
+# Usage: publish-patch, publish-minor, publish-major
+# Publishing is handled by GitHub Actions, triggered by tag creation.
+publish-%:
+ version=$(shell ./scripts/version.sh --bump $*) && \
+ git tag --sign "$$version" -m "Release: $$version" && \
+ git push origin tag "$$version"
+
+readme:
+ go install github.com/norwoodj/helm-docs/cmd/helm-docs@latest
+ helm-docs --output-file ../README.md \
+ --values-file=values.yaml --chart-search-root=coder-observability --template-files=../README.gotmpl
\ No newline at end of file
diff --git a/PUBLISH.md b/PUBLISH.md
new file mode 100644
index 0000000..6828c04
--- /dev/null
+++ b/PUBLISH.md
@@ -0,0 +1,4 @@
+# Publishing the Coder Observability Chart
+
+- make desired changes
+- run `make publish-{major|minor|patch}` which creates & pushes a new tag, which kicks off a GH Action to publish the chart
\ No newline at end of file
diff --git a/README.gotmpl b/README.gotmpl
new file mode 100644
index 0000000..411d638
--- /dev/null
+++ b/README.gotmpl
@@ -0,0 +1,242 @@
+
+
+
+# Coder Observability Chart
+
+> [!NOTE]
+> This Helm chart is in BETA; use with caution
+
+## Overview
+
+This chart contains a highly opinionated set of integrations between Grafana, Loki, Prometheus, Alertmanager, and
+Grafana Agent.
+
+Dashboards, alerts, and runbooks are preconfigured for monitoring [Coder](https://coder.com/) installations.
+
+Out of the box:
+
+Metrics will be scraped from all pods which have a `prometheus.io/scrape=true` annotation.
+Logs will be scraped from all pods in the Kubernetes cluster.
+
+## Installation
+
+
+
+```bash
+helm repo add coder-observability https://helm.coder.com/observability
+helm upgrade --install coder-observability coder-observability/coder-observability --version 0.1.1 --namespace coder-observability --create-namespace
+```
+
+## Requirements
+
+### General
+
+- Helm 3.7+
+
+### Coder
+
+
+Kubernetes-based deployments
+ If your installation is not in a namespace named `coder`, you will need to modify:
+
+```yaml
+global:
+ coder:
+ controlPlaneNamespace:
+ externalProvisionersNamespace:
+```
+
+
+
+
+Non-Kubernetes deployments (click to expand)
+ Ensure your Coder installation is accessible to the resources created by this chart.
+
+Set `global.coder.scrapeMetrics` such that the metrics can be scraped from your installation, e.g.:
+
+```yaml
+global:
+ coder:
+ scrapeMetrics:
+ hostname: your.coder.host
+ port: 2112
+ scrapeInterval: 15s
+ additionalLabels:
+ job: coder
+```
+
+If you would like your logs scraped from a process outside Kubernetes, you need to mount the log file(s) in and
+configure Grafana Agent to scrape them; here's an example configuration:
+
+```yaml
+grafana-agent:
+ agent:
+ mounts:
+ extra:
+ - mountPath: /var/log
+ name: logs
+ readOnly: true
+ controller:
+ volumes:
+ extra:
+ - hostPath:
+ path: /var/log
+ name: logs
+
+ extraBlocks: |-
+ loki.source.file "coder_log" {
+ targets = [
+ {__path__ = "/var/log/coder.log", job="coder"},
+ ]
+ forward_to = [loki.write.loki.receiver]
+ }
+```
+
+
+
+Ensure these environment variables are set in your Coder deployment:
+
+- `CODER_PROMETHEUS_ENABLE=true`
+- `CODER_PROMETHEUS_COLLECT_AGENT_STATS=true`
+- `CODER_LOGGING_HUMAN=/dev/stderr` (only `human` log format is supported
+ currently; [issue](https://github.com/coder/observability/issues/8))
+
+Ensure these labels exist on your Coder & provisioner deployments:
+
+- `prometheus.io/scrape=true`
+- `prometheus.io/port=2112` (ensure this matches the port defined by `CODER_PROMETHEUS_ADDRESS`)
+
+If you use the [`coder/coder` helm chart](https://github.com/coder/coder/tree/main/helm), you can use the
+following:
+
+```yaml
+coder:
+ podAnnotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "2112"
+```
+
+For more details, see
+the [coder documentation on exposing Prometheus metrics](https://coder.com/docs/v2/latest/admin/prometheus).
+
+### Postgres
+
+You may configure the Helm chart to monitor your Coder deployment's Postgres server. Ensure that the resources created
+by this Helm chart can access your Postgres server.
+
+Create a secret with your Postgres password and reference it as follows, along with the other connection details:
+
+```yaml
+global:
+ postgres:
+ hostname:
+ port:
+ database:
+ username:
+ mountSecret:
+```
+
+The secret should be in the form of `PGPASSWORD=`, as this secret will be used to create an environment
+variable.
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: pg-secret
+ namespace: coder-observability
+data:
+ PGPASSWORD:
+```
+
+
+Postgres metrics (click to expand)
+
+A tool called [`postgres-exporter`](https://github.com/prometheus-community/postgres_exporter) is used to scrape metrics
+from your Postgres server, and you can see the metrics it is exposing as follows:
+
+```bash
+kubectl -n coder-observability port-forward statefulset/postgres-exporter 9187
+
+curl http://localhost:9187/metrics
+```
+
+
+
+### Grafana
+
+To access Grafana, run:
+
+```bash
+kubectl -n coder-observability port-forward svc/grafana 3000:80
+```
+
+And open your web browser to http://localhost:3000/.
+
+By default, Grafana is configured to allow anonymous access; if you want password authentication, define this in
+your `values.yaml`:
+
+```yaml
+grafana:
+ admin:
+ existingSecret: grafana-admin
+ userKey: username
+ passwordKey: password
+ grafana.ini:
+ auth.anonymous:
+ enabled: false
+```
+
+You will also need to define a secret as follows:
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: grafana-admin # this matches the "existingSecret" field above
+stringData:
+ username: "" # this matches the "userKey" field above
+ password: "" # this matches the "passwordKey" field above
+```
+
+To add an Ingress for Grafana, define this in your `values.yaml`:
+
+```yaml
+grafana:
+ grafana.ini:
+ server:
+ domain: observability.example.com
+ root_url: "%(protocol)s://%(domain)s/grafana"
+ serve_from_sub_path: true
+ ingress:
+ enabled: true
+ hosts:
+ - "observability.example.com"
+ path: "/"
+```
+
+## Subcharts
+
+{{ template "chart.requirementsTable" . }}
+
+Each subchart can be disabled by setting the `enabled` field to `false`.
+
+| Subchart | Setting |
+|-----------------|-------------------------|
+| `grafana` | `grafana.enabled` |
+| `grafana-agent` | `grafana-agent.enabled` |
+| `loki` | `loki.enabled` |
+| `prometheus` | `prometheus.enabled` |
+
+## Values
+
+The `global` values are the values which pertain to this chart, while the rest pertain to the subcharts.
+These values represent only the values _set_ in this chart. For the full list of available values, please see each
+subchart.
+
+For example, the `grafana.replicas` value is set by this chart by default, and is one of hundreds of available
+values which are defined [here](https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration).
+
+{{ template "chart.valuesTable" . }}
+
+{{ template "helm-docs.versionFooter" . }}
diff --git a/README.md b/README.md
index b15fcf1..1a80c26 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,492 @@
+
+
+
# Coder Observability Chart
-Requires Helm 3.7+
\ No newline at end of file
+> [!NOTE]
+> This Helm chart is in BETA; use with caution
+
+## Overview
+
+This chart contains a highly opinionated set of integrations between Grafana, Loki, Prometheus, Alertmanager, and
+Grafana Agent.
+
+Dashboards, alerts, and runbooks are preconfigured for monitoring [Coder](https://coder.com/) installations.
+
+Out of the box:
+
+Metrics will be scraped from all pods which have a `prometheus.io/scrape=true` annotation.
+Logs will be scraped from all pods in the Kubernetes cluster.
+
+## Installation
+
+
+
+```bash
+helm repo add coder-observability https://helm.coder.com/observability
+helm upgrade --install coder-observability coder-observability/coder-observability --version 0.1.1 --namespace coder-observability --create-namespace
+```
+
+## Requirements
+
+### General
+
+- Helm 3.7+
+
+### Coder
+
+
+Kubernetes-based deployments
+ If your installation is not in a namespace named `coder`, you will need to modify:
+
+```yaml
+global:
+ coder:
+ controlPlaneNamespace:
+ externalProvisionersNamespace:
+```
+
+
+
+
+Non-Kubernetes deployments (click to expand)
+ Ensure your Coder installation is accessible to the resources created by this chart.
+
+Set `global.coder.scrapeMetrics` such that the metrics can be scraped from your installation, e.g.:
+
+```yaml
+global:
+ coder:
+ scrapeMetrics:
+ hostname: your.coder.host
+ port: 2112
+ scrapeInterval: 15s
+ additionalLabels:
+ job: coder
+```
+
+If you would like your logs scraped from a process outside Kubernetes, you need to mount the log file(s) in and
+configure Grafana Agent to scrape them; here's an example configuration:
+
+```yaml
+grafana-agent:
+ agent:
+ mounts:
+ extra:
+ - mountPath: /var/log
+ name: logs
+ readOnly: true
+ controller:
+ volumes:
+ extra:
+ - hostPath:
+ path: /var/log
+ name: logs
+
+ extraBlocks: |-
+ loki.source.file "coder_log" {
+ targets = [
+ {__path__ = "/var/log/coder.log", job="coder"},
+ ]
+ forward_to = [loki.write.loki.receiver]
+ }
+```
+
+
+
+Ensure these environment variables are set in your Coder deployment:
+
+- `CODER_PROMETHEUS_ENABLE=true`
+- `CODER_PROMETHEUS_COLLECT_AGENT_STATS=true`
+- `CODER_LOGGING_HUMAN=/dev/stderr` (only `human` log format is supported
+ currently; [issue](https://github.com/coder/observability/issues/8))
+
+Ensure these labels exist on your Coder & provisioner deployments:
+
+- `prometheus.io/scrape=true`
+- `prometheus.io/port=2112` (ensure this matches the port defined by `CODER_PROMETHEUS_ADDRESS`)
+
+If you use the [`coder/coder` helm chart](https://github.com/coder/coder/tree/main/helm), you can use the
+following:
+
+```yaml
+coder:
+ podAnnotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "2112"
+```
+
+For more details, see
+the [coder documentation on exposing Prometheus metrics](https://coder.com/docs/v2/latest/admin/prometheus).
+
+### Postgres
+
+You may configure the Helm chart to monitor your Coder deployment's Postgres server. Ensure that the resources created
+by this Helm chart can access your Postgres server.
+
+Create a secret with your Postgres password and reference it as follows, along with the other connection details:
+
+```yaml
+global:
+ postgres:
+ hostname:
+ port:
+ database:
+ username:
+ mountSecret:
+```
+
+The secret should be in the form of `PGPASSWORD=`, as this secret will be used to create an environment
+variable.
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: pg-secret
+ namespace: coder-observability
+data:
+ PGPASSWORD:
+```
+
+
+Postgres metrics (click to expand)
+
+A tool called [`postgres-exporter`](https://github.com/prometheus-community/postgres_exporter) is used to scrape metrics
+from your Postgres server, and you can see the metrics it is exposing as follows:
+
+```bash
+kubectl -n coder-observability port-forward statefulset/postgres-exporter 9187
+
+curl http://localhost:9187/metrics
+```
+
+
+
+### Grafana
+
+To access Grafana, run:
+
+```bash
+kubectl -n coder-observability port-forward svc/grafana 3000:80
+```
+
+And open your web browser to http://localhost:3000/.
+
+By default, Grafana is configured to allow anonymous access; if you want password authentication, define this in
+your `values.yaml`:
+
+```yaml
+grafana:
+ admin:
+ existingSecret: grafana-admin
+ userKey: username
+ passwordKey: password
+ grafana.ini:
+ auth.anonymous:
+ enabled: false
+```
+
+You will also need to define a secret as follows:
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: grafana-admin # this matches the "existingSecret" field above
+stringData:
+ username: "" # this matches the "userKey" field above
+ password: "" # this matches the "passwordKey" field above
+```
+
+To add an Ingress for Grafana, define this in your `values.yaml`:
+
+```yaml
+grafana:
+ grafana.ini:
+ server:
+ domain: observability.example.com
+ root_url: "%(protocol)s://%(domain)s/grafana"
+ serve_from_sub_path: true
+ ingress:
+ enabled: true
+ hosts:
+ - "observability.example.com"
+ path: "/"
+```
+
+## Subcharts
+
+| Repository | Name | Version |
+|------------|------|---------|
+| https://grafana.github.io/helm-charts | grafana | ~v7.3.7 |
+| https://grafana.github.io/helm-charts | grafana-agent(grafana-agent) | ~0.37.0 |
+| https://grafana.github.io/helm-charts | loki | ~v6.7.3 |
+| https://prometheus-community.github.io/helm-charts | prometheus | ~v25.24.1 |
+
+Each subchart can be disabled by setting the `enabled` field to `false`.
+
+| Subchart | Setting |
+|-----------------|-------------------------|
+| `grafana` | `grafana.enabled` |
+| `grafana-agent` | `grafana-agent.enabled` |
+| `loki` | `loki.enabled` |
+| `prometheus` | `prometheus.enabled` |
+
+## Values
+
+The `global` values are the values which pertain to this chart, while the rest pertain to the subcharts.
+These values represent only the values _set_ in this chart. For the full list of available values, please see each
+subchart.
+
+For example, the `grafana.replicas` value is set by this chart by default, and is one of hundreds of available
+values which are defined [here](https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration).
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| global.coder.alerts | object | `{"coderd":{"groups":{"CPU":{"delay":"10m","enabled":true,"period":"10m","thresholds":{"critical":0.9,"warning":0.8}},"IneligiblePrebuilds":{"delay":"10m","enabled":true,"thresholds":{"notify":1}},"Memory":{"delay":"10m","enabled":true,"thresholds":{"critical":0.9,"warning":0.8}},"Replicas":{"delay":"5m","enabled":true,"thresholds":{"critical":1,"notify":3,"warning":2}},"Restarts":{"delay":"1m","enabled":true,"period":"10m","thresholds":{"critical":3,"notify":1,"warning":2}},"UnprovisionedPrebuiltWorkspaces":{"delay":"10m","enabled":true,"thresholds":{"warn":1}},"WorkspaceBuildFailures":{"delay":"10m","enabled":true,"period":"10m","thresholds":{"critical":10,"notify":2,"warning":5}}}},"enterprise":{"groups":{"Licences":{"delay":"1m","enabled":true,"thresholds":{"critical":1,"warning":0.9}}}},"provisionerd":{"groups":{"Replicas":{"delay":"5m","enabled":true,"thresholds":{"critical":1,"notify":3,"warning":2}}}}}` | alerts for the various aspects of Coder |
+| global.coder.coderdSelector | string | `"pod=~`coder.*`, pod!~`.*provisioner.*`"` | series selector for Prometheus/Loki to locate provisioner pods. ensure this uses backticks for quotes! |
+| global.coder.controlPlaneNamespace | string | `"coder"` | the namespace into which the control plane has been deployed. |
+| global.coder.externalProvisionersNamespace | string | `"coder"` | the namespace into which any external provisioners have been deployed. |
+| global.coder.logFormat | string | `"human"` | |
+| global.coder.provisionerdSelector | string | `"pod=~`coder-provisioner.*`"` | series selector for Prometheus/Loki to locate provisioner pods. https://coder.com/docs/v2/latest/admin/provisioners TODO: rename container label in provisioner helm chart to be "provisioner" not "coder" ensure this uses backticks for quotes! |
+| global.coder.scrapeMetrics | string | `nil` | use this to scrape metrics from a standalone (set of) coder deployment(s) if using kubernetes, rather add an annotation "prometheus.io/scrape=true" and coder will get automatically scraped; set this value to null and configure coderdSelector to target your coder pods |
+| global.coder.workspacesSelector | string | `"namespace=`coder-workspaces`"` | the namespace into which any external provisioners have been deployed. |
+| global.dashboards | object | `{"queryTimeout":900,"refresh":"30s","timerange":"12h"}` | settings for bundled dashboards |
+| global.dashboards.queryTimeout | int | `900` | how long until a query in Grafana will timeout after |
+| global.dashboards.refresh | string | `"30s"` | how often dashboards should refresh |
+| global.dashboards.timerange | string | `"12h"` | how far back dashboards should look |
+| global.externalScheme | string | `"http"` | |
+| global.externalZone | string | `"svc.cluster.local"` | |
+| global.postgres | object | `{"alerts":{"groups":{"Basic":{"delay":"1m","enabled":true},"Connections":{"delay":"5m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}},"Notifications":{"delay":"15m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}}}},"database":"coder","exporter":{"image":"quay.io/prometheuscommunity/postgres-exporter"},"hostname":"localhost","mountSecret":"secret-postgres","password":null,"port":5432,"sslmode":"disable","sslrootcert":null,"username":"coder","volumeMounts":[],"volumes":[]}` | postgres connection information NOTE: these settings are global so we can parameterise some values which get rendered by subcharts |
+| global.postgres.alerts | object | `{"groups":{"Basic":{"delay":"1m","enabled":true},"Connections":{"delay":"5m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}},"Notifications":{"delay":"15m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}}}}` | alerts for postgres |
+| global.telemetry | object | `{"metrics":{"scrape_interval":"15s","scrape_timeout":"12s"}}` | control telemetry collection |
+| global.telemetry.metrics | object | `{"scrape_interval":"15s","scrape_timeout":"12s"}` | control metric collection |
+| global.telemetry.metrics.scrape_interval | string | `"15s"` | how often the collector will scrape discovered pods |
+| global.telemetry.metrics.scrape_timeout | string | `"12s"` | how long a request will be allowed to wait before being canceled |
+| global.zone | string | `"svc"` | |
+| grafana-agent.agent.clustering.enabled | bool | `false` | |
+| grafana-agent.agent.configMap.create | bool | `false` | |
+| grafana-agent.agent.configMap.key | string | `"config.river"` | |
+| grafana-agent.agent.configMap.name | string | `"collector-config"` | |
+| grafana-agent.agent.extraArgs[0] | string | `"--disable-reporting=true"` | |
+| grafana-agent.agent.mode | string | `"flow"` | |
+| grafana-agent.agent.mounts.dockercontainers | bool | `true` | |
+| grafana-agent.agent.mounts.varlog | bool | `true` | |
+| grafana-agent.commonRelabellings | string | `"rule {\n source_labels = [\"__meta_kubernetes_namespace\"]\n target_label = \"namespace\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_name\"]\n target_label = \"pod\"\n}\n// coalesce the following labels and pick the first value; we'll use this to define the \"job\" label\nrule {\n source_labels = [\"__meta_kubernetes_pod_label_app_kubernetes_io_component\", \"app\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n target_label = \"__meta_app\"\n action = \"replace\"\n regex = \"^/*([^/]+?)(?:/.*)?$\" // split by the delimiter if it exists, we only want the first one\n replacement = \"${1}\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_namespace\", \"__meta_kubernetes_pod_label_app_kubernetes_io_name\", \"__meta_app\"]\n separator = \"/\"\n target_label = \"job\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_container_name\"]\n target_label = \"container\"\n}\nrule {\n regex = \"__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)\"\n action = \"labeldrop\"\n}\nrule {\n regex = \"pod_template_generation\"\n action = \"labeldrop\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_phase\"]\n regex = \"Pending|Succeeded|Failed|Completed\"\n action = \"drop\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_node_name\"]\n action = \"replace\"\n target_label = \"node\"\n}\nrule {\n action = \"labelmap\"\n regex = \"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)\"\n replacement = \"__param_$1\"\n}"` | |
+| grafana-agent.controller.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| grafana-agent.controller.type | string | `"daemonset"` | |
+| grafana-agent.crds.create | bool | `false` | |
+| grafana-agent.discovery | string | `"// Discover k8s nodes\ndiscovery.kubernetes \"nodes\" {\n role = \"node\"\n}\n\n// Discover k8s pods\ndiscovery.kubernetes \"pods\" {\n role = \"pod\"\n selectors {\n role = \"pod\"\n }\n}"` | |
+| grafana-agent.enabled | bool | `true` | |
+| grafana-agent.extraBlocks | string | `""` | |
+| grafana-agent.fullnameOverride | string | `"grafana-agent"` | |
+| grafana-agent.podLogsRelabelRules | string | `""` | |
+| grafana-agent.podMetricsRelabelRules | string | `""` | |
+| grafana-agent.withOTLPReceiver | bool | `false` | |
+| grafana."grafana.ini"."auth.anonymous".enabled | bool | `true` | |
+| grafana."grafana.ini"."auth.anonymous".org_name | string | `"Main Org."` | |
+| grafana."grafana.ini"."auth.anonymous".org_role | string | `"Admin"` | |
+| grafana."grafana.ini".analytics.reporting_enabled | bool | `false` | |
+| grafana."grafana.ini".dashboards.default_home_dashboard_path | string | `"/var/lib/grafana/dashboards/coder/0/status.json"` | |
+| grafana."grafana.ini".dataproxy.timeout | string | `"{{ $.Values.global.dashboards.queryTimeout }}"` | |
+| grafana."grafana.ini".feature_toggles.autoMigrateOldPanels | bool | `true` | |
+| grafana."grafana.ini".users.allow_sign_up | bool | `false` | |
+| grafana.admin.existingSecret | string | `""` | |
+| grafana.annotations."prometheus.io/scrape" | string | `"true"` | |
+| grafana.dashboardProviders."coder.yaml".apiVersion | int | `1` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].disableDeletion | bool | `false` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].editable | bool | `false` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].folder | string | `"Coder"` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].name | string | `"coder"` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].options.path | string | `"/var/lib/grafana/dashboards/coder"` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].orgId | int | `1` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].type | string | `"file"` | |
+| grafana.dashboardProviders."coder.yaml".providers[0].updateIntervalSeconds | int | `5` | |
+| grafana.dashboardProviders."infra.yaml".apiVersion | int | `1` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].disableDeletion | bool | `false` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].editable | bool | `false` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].folder | string | `"Infrastructure"` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].name | string | `"infra"` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].options.path | string | `"/var/lib/grafana/dashboards/infra"` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].orgId | int | `1` | |
+| grafana.dashboardProviders."infra.yaml".providers[0].type | string | `"file"` | |
+| grafana.dashboardProviders."sidecar.yaml".apiVersion | int | `1` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].disableDeletion | bool | `false` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].editable | bool | `false` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].folder | string | `"Other"` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].name | string | `"sidecar"` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].options.path | string | `"/tmp/dashboards"` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].orgId | int | `1` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].type | string | `"file"` | |
+| grafana.dashboardProviders."sidecar.yaml".providers[0].updateIntervalSeconds | int | `30` | |
+| grafana.dashboards.infra.node-exporter-full.datasource | string | `"metrics"` | |
+| grafana.dashboards.infra.node-exporter-full.gnetId | int | `1860` | |
+| grafana.dashboards.infra.node-exporter-full.revision | int | `36` | |
+| grafana.dashboards.infra.postgres-database.datasource | string | `"metrics"` | |
+| grafana.dashboards.infra.postgres-database.gnetId | int | `9628` | |
+| grafana.dashboards.infra.postgres-database.revision | int | `7` | |
+| grafana.datasources."datasources.yaml".apiVersion | int | `1` | |
+| grafana.datasources."datasources.yaml".datasources[0].access | string | `"proxy"` | |
+| grafana.datasources."datasources.yaml".datasources[0].editable | bool | `false` | |
+| grafana.datasources."datasources.yaml".datasources[0].isDefault | bool | `true` | |
+| grafana.datasources."datasources.yaml".datasources[0].name | string | `"metrics"` | |
+| grafana.datasources."datasources.yaml".datasources[0].timeout | string | `"{{ add $.Values.global.dashboards.queryTimeout 5 }}"` | |
+| grafana.datasources."datasources.yaml".datasources[0].type | string | `"prometheus"` | |
+| grafana.datasources."datasources.yaml".datasources[0].uid | string | `"prometheus"` | |
+| grafana.datasources."datasources.yaml".datasources[0].url | string | `"http://prometheus.{{ .Release.Namespace }}.{{ $.Values.global.zone }}"` | |
+| grafana.datasources."datasources.yaml".datasources[1].access | string | `"proxy"` | |
+| grafana.datasources."datasources.yaml".datasources[1].editable | bool | `false` | |
+| grafana.datasources."datasources.yaml".datasources[1].isDefault | bool | `false` | |
+| grafana.datasources."datasources.yaml".datasources[1].name | string | `"logs"` | |
+| grafana.datasources."datasources.yaml".datasources[1].timeout | string | `"{{ add $.Values.global.dashboards.queryTimeout 5 }}"` | |
+| grafana.datasources."datasources.yaml".datasources[1].type | string | `"loki"` | |
+| grafana.datasources."datasources.yaml".datasources[1].uid | string | `"loki"` | |
+| grafana.datasources."datasources.yaml".datasources[1].url | string | `"http://loki-gateway.{{ .Release.Namespace }}.{{ $.Values.global.zone }}"` | |
+| grafana.datasources."datasources.yaml".datasources[2].editable | bool | `false` | |
+| grafana.datasources."datasources.yaml".datasources[2].isDefault | bool | `false` | |
+| grafana.datasources."datasources.yaml".datasources[2].jsonData.sslmode | string | `"{{ .Values.global.postgres.sslmode }}"` | |
+| grafana.datasources."datasources.yaml".datasources[2].name | string | `"postgres"` | |
+| grafana.datasources."datasources.yaml".datasources[2].secureJsonData.password | string | `"{{ if .Values.global.postgres.password }}{{ .Values.global.postgres.password }}{{ else }}$PGPASSWORD{{ end }}"` | |
+| grafana.datasources."datasources.yaml".datasources[2].timeout | string | `"{{ add $.Values.global.dashboards.queryTimeout 5 }}"` | |
+| grafana.datasources."datasources.yaml".datasources[2].type | string | `"postgres"` | |
+| grafana.datasources."datasources.yaml".datasources[2].uid | string | `"postgres"` | |
+| grafana.datasources."datasources.yaml".datasources[2].url | string | `"{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}"` | |
+| grafana.datasources."datasources.yaml".datasources[2].user | string | `"{{ .Values.global.postgres.username }}"` | |
+| grafana.deploymentStrategy.type | string | `"Recreate"` | |
+| grafana.enabled | bool | `true` | |
+| grafana.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION | bool | `true` | |
+| grafana.extraConfigmapMounts[0].configMap | string | `"dashboards-status"` | |
+| grafana.extraConfigmapMounts[0].mountPath | string | `"/var/lib/grafana/dashboards/coder/0"` | |
+| grafana.extraConfigmapMounts[0].name | string | `"dashboards-status"` | |
+| grafana.extraConfigmapMounts[0].readOnly | bool | `false` | |
+| grafana.extraConfigmapMounts[1].configMap | string | `"dashboards-coderd"` | |
+| grafana.extraConfigmapMounts[1].mountPath | string | `"/var/lib/grafana/dashboards/coder/1"` | |
+| grafana.extraConfigmapMounts[1].name | string | `"dashboards-coderd"` | |
+| grafana.extraConfigmapMounts[1].readOnly | bool | `false` | |
+| grafana.extraConfigmapMounts[2].configMap | string | `"dashboards-provisionerd"` | |
+| grafana.extraConfigmapMounts[2].mountPath | string | `"/var/lib/grafana/dashboards/coder/2"` | |
+| grafana.extraConfigmapMounts[2].name | string | `"dashboards-provisionerd"` | |
+| grafana.extraConfigmapMounts[2].readOnly | bool | `false` | |
+| grafana.extraConfigmapMounts[3].configMap | string | `"dashboards-workspaces"` | |
+| grafana.extraConfigmapMounts[3].mountPath | string | `"/var/lib/grafana/dashboards/coder/3"` | |
+| grafana.extraConfigmapMounts[3].name | string | `"dashboards-workspaces"` | |
+| grafana.extraConfigmapMounts[3].readOnly | bool | `false` | |
+| grafana.extraConfigmapMounts[4].configMap | string | `"dashboards-workspace-detail"` | |
+| grafana.extraConfigmapMounts[4].mountPath | string | `"/var/lib/grafana/dashboards/coder/4"` | |
+| grafana.extraConfigmapMounts[4].name | string | `"dashboards-workspace-detail"` | |
+| grafana.extraConfigmapMounts[4].readOnly | bool | `false` | |
+| grafana.extraConfigmapMounts[5].configMap | string | `"dashboards-prebuilds"` | |
+| grafana.extraConfigmapMounts[5].mountPath | string | `"/var/lib/grafana/dashboards/coder/5"` | |
+| grafana.extraConfigmapMounts[5].name | string | `"dashboards-prebuilds"` | |
+| grafana.extraConfigmapMounts[5].readOnly | bool | `false` | |
+| grafana.fullnameOverride | string | `"grafana"` | |
+| grafana.image.tag | string | `"10.4.19"` | |
+| grafana.persistence.enabled | bool | `true` | |
+| grafana.persistence.size | string | `"10Gi"` | |
+| grafana.replicas | int | `1` | |
+| grafana.service.enabled | bool | `true` | |
+| grafana.sidecar.dashboards.enabled | bool | `false` | |
+| grafana.sidecar.dashboards.labelValue | string | `"1"` | |
+| grafana.sidecar.dashboards.provider.allowUiUpdates | bool | `true` | |
+| grafana.sidecar.dashboards.provider.disableDelete | bool | `true` | |
+| grafana.testFramework.enabled | bool | `false` | |
+| grafana.useStatefulSet | bool | `true` | |
+| loki.backend.extraArgs[0] | string | `"-log.level=debug"` | |
+| loki.backend.extraVolumeMounts[0].mountPath | string | `"/var/loki-ruler-wal"` | |
+| loki.backend.extraVolumeMounts[0].name | string | `"ruler-wal"` | |
+| loki.backend.extraVolumes[0].emptyDir | object | `{}` | |
+| loki.backend.extraVolumes[0].name | string | `"ruler-wal"` | |
+| loki.backend.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| loki.backend.replicas | int | `1` | |
+| loki.chunksCache.allocatedMemory | int | `1024` | |
+| loki.enabled | bool | `true` | |
+| loki.enterprise.adminApi.enabled | bool | `false` | |
+| loki.enterprise.enabled | bool | `false` | |
+| loki.enterprise.useExternalLicense | bool | `false` | |
+| loki.fullnameOverride | string | `"loki"` | |
+| loki.gateway.replicas | int | `1` | |
+| loki.loki.auth_enabled | bool | `false` | |
+| loki.loki.commonConfig.path_prefix | string | `"/var/loki"` | |
+| loki.loki.commonConfig.replication_factor | int | `1` | |
+| loki.loki.rulerConfig.alertmanager_url | string | `"http://alertmanager.{{ .Release.Namespace }}.{{ .Values.global.zone}}"` | |
+| loki.loki.rulerConfig.enable_alertmanager_v2 | bool | `true` | |
+| loki.loki.rulerConfig.enable_api | bool | `true` | |
+| loki.loki.rulerConfig.remote_write.clients.fake.headers.Source | string | `"Loki"` | |
+| loki.loki.rulerConfig.remote_write.clients.fake.remote_timeout | string | `"30s"` | |
+| loki.loki.rulerConfig.remote_write.clients.fake.url | string | `"http://prometheus.{{ .Release.Namespace }}.{{ .Values.global.zone}}/api/v1/write"` | |
+| loki.loki.rulerConfig.remote_write.enabled | bool | `true` | |
+| loki.loki.rulerConfig.ring.kvstore.store | string | `"inmemory"` | |
+| loki.loki.rulerConfig.rule_path | string | `"/rules"` | |
+| loki.loki.rulerConfig.storage.local.directory | string | `"/rules"` | |
+| loki.loki.rulerConfig.storage.type | string | `"local"` | |
+| loki.loki.rulerConfig.wal.dir | string | `"/var/loki-ruler-wal"` | |
+| loki.loki.schemaConfig.configs[0].from | string | `"2024-04-01"` | |
+| loki.loki.schemaConfig.configs[0].index.period | string | `"24h"` | |
+| loki.loki.schemaConfig.configs[0].index.prefix | string | `"index_"` | |
+| loki.loki.schemaConfig.configs[0].object_store | string | `"s3"` | |
+| loki.loki.schemaConfig.configs[0].schema | string | `"v13"` | |
+| loki.loki.schemaConfig.configs[0].store | string | `"tsdb"` | |
+| loki.lokiCanary.annotations."prometheus.io/scrape" | string | `"true"` | |
+| loki.lokiCanary.enabled | bool | `true` | |
+| loki.minio.address | string | `"loki-storage.{{ .Release.Namespace }}.{{ .Values.global.zone}}:9000"` | |
+| loki.minio.enabled | bool | `true` | |
+| loki.minio.fullnameOverride | string | `"loki-storage"` | |
+| loki.minio.podAnnotations."prometheus.io/path" | string | `"/minio/v2/metrics/cluster"` | |
+| loki.minio.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| loki.minio.podLabels."app.kubernetes.io/name" | string | `"loki-storage"` | |
+| loki.monitoring.dashboards.enabled | bool | `true` | |
+| loki.monitoring.selfMonitoring.enabled | bool | `false` | |
+| loki.monitoring.selfMonitoring.grafanaAgent.installOperator | bool | `false` | |
+| loki.nameOverride | string | `"loki"` | |
+| loki.read.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| loki.read.replicas | int | `1` | |
+| loki.resultsCache.allocatedMemory | int | `1024` | |
+| loki.sidecar.rules.folder | string | `"/rules/fake"` | |
+| loki.sidecar.rules.logLevel | string | `"DEBUG"` | |
+| loki.test.canaryServiceAddress | string | `"http://loki-canary:3500/metrics"` | |
+| loki.test.enabled | bool | `true` | |
+| loki.write.extraArgs[0] | string | `"-log.level=debug"` | |
+| loki.write.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| loki.write.replicas | int | `1` | |
+| prometheus.alertmanager.enabled | bool | `true` | |
+| prometheus.alertmanager.fullnameOverride | string | `"alertmanager"` | |
+| prometheus.alertmanager.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| prometheus.alertmanager.service.port | int | `80` | |
+| prometheus.configmapReload.prometheus.containerPort | int | `9091` | |
+| prometheus.configmapReload.prometheus.extraArgs.log-level | string | `"all"` | |
+| prometheus.configmapReload.prometheus.extraArgs.watch-interval | string | `"15s"` | |
+| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].configMap | string | `"metrics-alerts"` | |
+| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].mountPath | string | `"/etc/config/alerts"` | |
+| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].name | string | `"alerts"` | |
+| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].readonly | bool | `true` | |
+| prometheus.enabled | bool | `true` | |
+| prometheus.kube-state-metrics.enabled | bool | `true` | |
+| prometheus.kube-state-metrics.fullnameOverride | string | `"kube-state-metrics"` | |
+| prometheus.kube-state-metrics.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| prometheus.prometheus-node-exporter.enabled | bool | `true` | |
+| prometheus.prometheus-node-exporter.fullnameOverride | string | `"node-exporter"` | |
+| prometheus.prometheus-node-exporter.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| prometheus.prometheus-pushgateway.enabled | bool | `false` | |
+| prometheus.server.extraArgs."log.level" | string | `"debug"` | |
+| prometheus.server.extraConfigmapMounts[0].configMap | string | `"metrics-alerts"` | |
+| prometheus.server.extraConfigmapMounts[0].mountPath | string | `"/etc/config/alerts"` | |
+| prometheus.server.extraConfigmapMounts[0].name | string | `"alerts"` | |
+| prometheus.server.extraConfigmapMounts[0].readonly | bool | `true` | |
+| prometheus.server.extraFlags[0] | string | `"web.enable-lifecycle"` | |
+| prometheus.server.extraFlags[1] | string | `"enable-feature=remote-write-receiver"` | |
+| prometheus.server.fullnameOverride | string | `"prometheus"` | |
+| prometheus.server.global.evaluation_interval | string | `"30s"` | |
+| prometheus.server.persistentVolume.enabled | bool | `true` | |
+| prometheus.server.persistentVolume.size | string | `"12Gi"` | |
+| prometheus.server.podAnnotations."prometheus.io/scrape" | string | `"true"` | |
+| prometheus.server.replicaCount | int | `1` | |
+| prometheus.server.retentionSize | string | `"10GB"` | |
+| prometheus.server.service.type | string | `"ClusterIP"` | |
+| prometheus.server.statefulSet.enabled | bool | `true` | |
+| prometheus.serverFiles."prometheus.yml".rule_files[0] | string | `"/etc/config/alerts/*.yaml"` | |
+| prometheus.serverFiles."prometheus.yml".scrape_configs | list | `[]` | |
+| prometheus.testFramework.enabled | bool | `false` | |
+| runbookViewer.image | string | `"dannyben/madness"` | |
+| sqlExporter.image | string | `"burningalchemist/sql_exporter"` | |
+
diff --git a/artifacthub-repo.yaml b/artifacthub-repo.yaml
new file mode 100644
index 0000000..dbbc45c
--- /dev/null
+++ b/artifacthub-repo.yaml
@@ -0,0 +1,8 @@
+# This file is uploaded to GCS at helm.coder.com/observability/artifacthub-repo.yml
+# and used by ArtifactHub to verify the repository.
+repositoryID: 167a0393-cb7e-4f42-af79-02f8a91915f5
+owners:
+ - name: colin
+ email: colin@coder.com
+ - name: Danny Kopping
+ email: danny@coder.com
\ No newline at end of file
diff --git a/coder-observability/Chart.lock b/coder-observability/Chart.lock
index c574782..1782a88 100644
--- a/coder-observability/Chart.lock
+++ b/coder-observability/Chart.lock
@@ -1,15 +1,15 @@
dependencies:
- name: grafana
repository: https://grafana.github.io/helm-charts
- version: 7.3.7
+ version: 7.3.12
- name: prometheus
repository: https://prometheus-community.github.io/helm-charts
- version: 25.18.0
+ version: 25.24.2
- name: loki
repository: https://grafana.github.io/helm-charts
- version: 6.3.4
+ version: 6.7.4
- name: grafana-agent
repository: https://grafana.github.io/helm-charts
version: 0.37.0
-digest: sha256:bf2593a78b3934ec78ffcd527947a64d8a7f223912a89d8f6c57ab8f4c4c12a1
-generated: "2024-04-24T14:44:26.109564+02:00"
+digest: sha256:05e0dae0200cabf5cb9e2cfb18a4e166fcaceefaf39827addff4299b18c31d4e
+generated: "2025-01-16T07:54:38.036598102Z"
diff --git a/coder-observability/Chart.yaml b/coder-observability/Chart.yaml
index be0d981..9e40bfa 100644
--- a/coder-observability/Chart.yaml
+++ b/coder-observability/Chart.yaml
@@ -4,28 +4,24 @@ description: Gain insights into your Coder deployment
type: application
version: 0.1.0
-appVersion: "2.9.2"
dependencies:
- name: grafana
- alias: dashboards
- condition: dashboards.enabled
+ condition: grafana.enabled
repository: https://grafana.github.io/helm-charts
- version: v7.3.7
+ version: '~v7.3.7'
- name: prometheus
- alias: metrics
- condition: metrics.enabled
+ condition: prometheus.enabled
repository: https://prometheus-community.github.io/helm-charts
- version: v25.18.0
+ version: '~v25.24.1'
- name: loki
- alias: logs
- condition: logs.enabled
+ condition: loki.enabled
repository: https://grafana.github.io/helm-charts
- version: v6.3.4
+ version: '~v6.7.3'
- name: grafana-agent
- alias: collector
- condition: collector.enabled
+ alias: grafana-agent
+ condition: grafana-agent.enabled
repository: https://grafana.github.io/helm-charts
- version: 0.37.0
+ version: '~0.37.0'
maintainers:
- name: Coder Technologies, Inc.
url: https://github.com/coder/observability/issues
@@ -37,4 +33,6 @@ keywords:
- cde
sources:
- https://github.com/coder/observability
-icon: https://helm.coder.com/coder_logo_black.png
\ No newline at end of file
+icon: https://helm.coder.com/coder_logo_black.png
+annotations:
+ artifacthub.io/category: monitoring-logging
\ No newline at end of file
diff --git a/coder-observability/runbooks/coderd.md b/coder-observability/runbooks/coderd.md
new file mode 100644
index 0000000..4a42444
--- /dev/null
+++ b/coder-observability/runbooks/coderd.md
@@ -0,0 +1,135 @@
+# Coderd Runbooks
+
+## CoderdCPUUsage
+
+The CPU usage of one or more Coder pods has been close to the limit defined for
+the deployment. This can cause slowness in the application, workspaces becoming
+unavailable, and may lead to the application failing its liveness probes and
+being restarted.
+
+To resolve this issue, increase the CPU limits of the Coder deployment.
+
+If you find this occurring frequently, you may wish to check your Coder
+deployment against [Coder's Reference Architectures](https://coder.com/docs/v2/latest/admin/architectures).
+
+## CoderdMemoryUsage
+
+The memory usage of one or more Coder pods has been close to the limit defined
+for the deployment. When the memory usage exceeds the limit, the pod(s) will be
+restarted by Kubernetes. This will interrupt all connections to workspaces being
+handled by the affected pod(s).
+
+To resolve this issue, increase the memory limits of the Coder deployment.
+
+If you find this occurring frequently, check the memory usage over a longer
+period of time. If it appears to be increasing monotonically, this is likely a
+memory leak and should be considered a bug.
+
+## CoderdRestarts
+
+One or more Coder pods have been restarting multiple times in the last 10
+minutes. This may be due to a number of issues, including:
+
+- Failure to connect to the configured database: Coder requires a reachable
+ PostgreSQL database to function. If it fails to connect, you will see an error
+ similar to the following:
+
+ ```console
+ [warn] ping postgres: retrying error="dial tcp 10.43.94.60:5432: connect: connection refused" try=3
+ ```
+
+- Out-Of-Memory (OOM) kills due to memory usage (see [above](#codermemoryusage)),
+- An unexpected bug causing the application to exit with an error.
+
+If Coder is not restarting due to excessive memory usage, check the logs:
+
+1. Check the logs of the deployment for any errors,
+
+```console
+kubectl -n logs deployment/coder --previous
+```
+
+2. Check any Kubernetes events related to the deployment,
+
+```console
+kubectl -n events --watch
+```
+
+## CoderdReplicas
+
+One or more Coderd replicas are down. This may cause availability problems and elevated
+response times for user and agent API calls.
+
+To resolve this issue, review the Coder deployment for possible `CrashLoopBackOff`
+instances or re-adjust alarm levels based on the actual number of replicas.
+
+## CoderdWorkspaceBuildFailures
+
+A few workspace build errors have been recently observed.
+
+Review Prometheus metrics to identify failed jobs. Check the workspace build logs
+to determine if there is a relationship with a new template version or a buggy
+Terraform plugin.
+
+## CoderdLicenseSeats
+
+Your Enterprise license is approaching or has exceeded the number of seats purchased.
+
+Please contact your Coder sales contact, or visit https://coder.com/contact/sales.
+
+## CoderdIneligiblePrebuilds
+
+Prebuilds only become eligible to be claimed by users once the workspace's agent is a) running and b) all of its startup
+scripts have completed.
+
+If a prebuilt workspace is not eligible, view its agent logs to diagnose the problem.
+
+## CoderdUnprovisionedPrebuiltWorkspaces
+
+The number of running prebuilt workspaces is lower than the desired instances. This could be for several reasons,
+ordered by likehood:
+
+### Experiment/License
+
+The prebuilds feature is currently gated behind an experiment *and* a premium license.
+
+Ensure that the prebuilds experiment is enabled with `CODER_EXPERIMENTS=workspace-prebuilds`, and that you have a premium
+license added.
+
+### Preset Validation Issue
+
+Templates which have prebuilds configured will require a configured preset defined, with ALL of the required parameters
+set in the preset. If any of these are missing, or any of the parameters - as defined - fail validation, then the prebuilds
+subsystem will refuse to attempt a workspace build.
+
+Consult the coderd logs for more information; look out for errors or warnings from the prebuilds subsystem.
+
+### Template Misconfiguration or Error
+
+Prebuilt workspaces cannot be provisioned due to some issue at `terraform apply`-time. This could be due to misconfigured
+cloud resources, improper authorization, or any number of other issues.
+
+Visit the Workspaces page, change the search term to `owner:prebuilds`, and view on the previously failed builds. The
+error will likely be quite obvious.
+
+### Provisioner Latency
+
+If your provisioners are overloaded and cannot process provisioner jobs quickly enough, prebuilt workspaces may be affected.
+There is no prioritization at present for prebuilt workspace jobs.
+
+Ensure your provisioners are appropriately resources (i.e. you have enough instances) to handle the concurrent build demand.
+
+### Use of Workspace Tags
+
+If you are using `coder_workspace_tags` ([docs](https://coder.com/docs/admin/templates/extending-templates/workspace-tags))
+in your template, chances are you do not have any provisioners running or they are under-resourced (see **Provisioner Latency**).
+
+Ensure your running provisioners are configured with your desired tags.
+
+### Reconciliation Loop Issue
+
+The prebuilds subsystem runs a _reconciliation loop_ which monitors the state of prebuilt workspaces to ensure the desired
+number of instances are present at all times. Workspace Prebuilds is currently a BETA feature and so there could be a bug
+in this _reconciliation loop_, which should be reported to Coder.
+
+Examine your coderd logs for any errors or warnings relating to prebuilds.
\ No newline at end of file
diff --git a/coder-observability/runbooks/postgres.md b/coder-observability/runbooks/postgres.md
new file mode 100644
index 0000000..155d848
--- /dev/null
+++ b/coder-observability/runbooks/postgres.md
@@ -0,0 +1,44 @@
+# Postgres Runbooks
+
+## PostgresNotificationQueueFillingUp
+
+Postgres offers asynchronous notification via the `LISTEN` and `NOTIFY`
+commands. Coder depends heavily on this async notification mechanism for routine
+functionality.
+
+This may be due to a session executing `LISTEN()` and entering a long
+transaction. To verify:
+
+- Check active sessions with `SELECT * FROM pg_stat_activity;`,
+- Check the database log for the PID of the session that is preventing cleanup,
+- Kill the query: `SELECT pg_terminate_backend();`
+
+For more information, see the PostgreSQL documentation available here:
+
+- [PostgreSQL documentation on `LISTEN`](https://www.postgresql.org/docs/current/sql-listen.html)
+- [PostgreSQL documentation on `NOTIFY`](https://www.postgresql.org/docs/current/sql-notify.html)
+
+## PostgresDown
+
+Postgres is not currently running, which means the Coder control plane will not be able to read or write any state.
+Workspaces may continue to work normally but it is recommended to get Postgres back up as quickly as possible.
+
+## PostgresConnectionsRunningLow
+
+PostgreSQL has a `max_connections` setting that determines the maximum number of
+concurrent connections. Once this connection limit is reached, no new
+connections will be possible.
+
+To increase the maximum number of concurrent connections, update the `max_connections`
+configuration option for your PostgreSQL instance. See the PostgreSQL
+documentation for more details.
+
+**Note:** You may also need to adjust `shared_buffers` after increasing
+`max_connections`. Additionally, you may also need to adjust the kernel
+configuration value `kernel.shmmax` in `/etc/sysctl.conf` /
+`/etc/sysctl.conf.d`.
+
+For more information, see:
+
+- [PostgreSQL Documentation: Server Configuration](https://www.postgresql.org/docs/16/runtime-config-file-locations.html)
+- [Tuning your PostgreSQL Server](https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server)
diff --git a/coder-observability/runbooks/provisionerd.md b/coder-observability/runbooks/provisionerd.md
new file mode 100644
index 0000000..9cb0e84
--- /dev/null
+++ b/coder-observability/runbooks/provisionerd.md
@@ -0,0 +1,9 @@
+# Provisionerd Runbooks
+
+## ProvisionerdReplicas
+
+One of more Provisioner replicas is down. Workspace builds may be queued and processed slower.
+
+To resolve this issue, review the Coder deployment (Coder provisioner pods)
+for possible `CrashLoopBackOff` instances or re-adjust alarm levels based on the actual
+number of replicas.
diff --git a/coder-observability/templates/_collector-config.tpl b/coder-observability/templates/_collector-config.tpl
new file mode 100644
index 0000000..555065c
--- /dev/null
+++ b/coder-observability/templates/_collector-config.tpl
@@ -0,0 +1,337 @@
+{{- define "collector-config" -}}
+{{ $agent := (index .Values "grafana-agent") }}
+
+{{ $agent.logging }}
+{{ $agent.discovery }}
+
+discovery.relabel "pod_logs" {
+ targets = discovery.kubernetes.pods.targets
+ {{ $agent.commonRelabellings | nindent 2 }}
+ rule {
+ source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
+ separator = "/"
+ action = "replace"
+ replacement = "/var/log/pods/*$1/*.log"
+ target_label = "__path__"
+ }
+ rule {
+ action = "replace"
+ source_labels = ["__meta_kubernetes_pod_container_id"]
+ regex = "^(\\w+):\\/\\/.+$"
+ replacement = "$1"
+ target_label = "tmp_container_runtime"
+ }
+ {{- if $agent.podLogsRelabelRules -}}
+ {{ $agent.podLogsRelabelRules | trim | nindent 2 }}
+ {{- end }}
+}
+
+discovery.relabel "pod_metrics" {
+ targets = discovery.kubernetes.pods.targets
+ {{ $agent.commonRelabellings | nindent 6 }}
+ // drop ports that do not expose Prometheus metrics, but might otherwise be exposed by a container which *also*
+ // exposes an HTTP port which exposes metrics
+ rule {
+ source_labels = ["__meta_kubernetes_pod_container_port_name"]
+ regex = "grpc|http-(memberlist|console)"
+ action = "drop"
+ }
+ // adapted from the Prometheus helm chart
+ // https://github.com/prometheus-community/helm-charts/blob/862870fc3c847e32479b509e511584d5283126a3/charts/prometheus/values.yaml#L1070
+ rule {
+ source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
+ action = "keep"
+ regex = "true"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scheme"]
+ action = "replace"
+ regex = "(https?)"
+ target_label = "__scheme__"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
+ action = "replace"
+ target_label = "__metrics_path__"
+ regex = "(.+)"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
+ action = "replace"
+ regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})"
+ replacement = "[$2]:$1"
+ target_label = "__address__"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
+ action = "replace"
+ regex = "(\\d+);((([0-9]+?)(\\.|$)){4})"
+ replacement = "$2:$1"
+ target_label = "__address__"
+ }
+ {{- if $agent.podMetricsRelabelRules -}}
+ {{ $agent.podMetricsRelabelRules | trim | nindent 2 }}
+ {{- end }}
+}
+
+local.file_match "pod_logs" {
+ path_targets = discovery.relabel.pod_logs.output
+}
+
+loki.source.file "pod_logs" {
+ targets = local.file_match.pod_logs.targets
+ forward_to = [loki.process.pod_logs.receiver]
+}
+
+loki.process "pod_logs" {
+ stage.match {
+ selector = "{tmp_container_runtime=\"containerd\"}"
+ // the cri processing stage extracts the following k/v pairs: log, stream, time, flags
+ stage.cri {}
+ // Set the extract flags and stream values as labels
+ stage.labels {
+ values = {
+ flags = "",
+ stream = "",
+ }
+ }
+ }
+
+ // if the label tmp_container_runtime from above is docker parse using docker
+ stage.match {
+ selector = "{tmp_container_runtime=\"docker\"}"
+ // the docker processing stage extracts the following k/v pairs: log, stream, time
+ stage.docker {}
+
+ // Set the extract stream value as a label
+ stage.labels {
+ values = {
+ stream = "",
+ }
+ }
+ }
+
+ // drop the temporary container runtime label as it is no longer needed
+ stage.label_drop {
+ values = ["tmp_container_runtime"]
+ }
+
+ // parse Coder logs and extract level & logger for efficient filtering
+ stage.match {
+ selector = "{pod=~\"coder.*\"}" // TODO: make configurable
+
+ stage.multiline {
+ firstline = {{ printf `^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3})` | quote }}
+ max_wait_time = "10s"
+ }
+
+ stage.regex {
+ expression = {{ printf `^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3})\s\[(?P\w+)\]\s\s(?P[^:]+):\s(?P.+)` | quote }}
+ }
+
+ stage.timestamp {
+ source = "ts"
+ format = "2006-01-02 15:04:05.000"
+ action_on_failure = "fudge" // rather have inaccurate time than drop the log line
+ }
+
+ stage.labels {
+ values = {
+ level = "",
+ logger = "",
+ }
+ }
+ }
+
+ forward_to = [loki.write.loki.receiver]
+}
+{{ if $agent.extraBlocks -}}
+{{ $agent.extraBlocks }}
+{{- end }}
+loki.write "loki" {
+ endpoint {
+ url = "http://{{ include "loki.fullname" .Subcharts.loki }}-gateway.{{ .Release.Namespace }}.{{ .Values.global.zone }}/loki/api/v1/push"
+ }
+}
+
+prometheus.scrape "pods" {
+ targets = discovery.relabel.pod_metrics.output
+ forward_to = [prometheus.relabel.pods.receiver]
+
+ scrape_interval = "{{ .Values.global.telemetry.metrics.scrape_interval }}"
+ scrape_timeout = "{{ .Values.global.telemetry.metrics.scrape_timeout }}"
+}
+
+// These are metric_relabel_configs while discovery.relabel are relabel_configs.
+// See https://github.com/grafana/agent/blob/main/internal/converter/internal/prometheusconvert/prometheusconvert.go#L95-L106
+prometheus.relabel "pods" {
+ forward_to = [prometheus.remote_write.default.receiver]
+
+ // Drop kube-state-metrics' labels which clash with ours
+ rule {
+ source_labels = ["__name__", "container"]
+ regex = "kube_pod.+;(.+)"
+ target_label = "container"
+ replacement = ""
+ }
+ rule {
+ source_labels = ["__name__", "pod"]
+ regex = "kube_pod.+;(.+)"
+ target_label = "pod"
+ replacement = ""
+ }
+ rule {
+ source_labels = ["__name__", "namespace"]
+ regex = "kube_pod.+;(.+)"
+ target_label = "namespace"
+ replacement = ""
+ }
+ rule {
+ source_labels = ["__name__", "exported_container"]
+ // don't replace an empty label
+ regex = "^kube_pod.+;(.+)$"
+ target_label = "container"
+ replacement = "$1"
+ }
+ rule {
+ source_labels = ["__name__", "exported_pod"]
+ // don't replace an empty label
+ regex = "^kube_pod.+;(.+)$"
+ target_label = "pod"
+ replacement = "$1"
+ }
+ rule {
+ source_labels = ["__name__", "exported_namespace"]
+ // don't replace an empty label
+ regex = "^kube_pod.+;(.+)$"
+ target_label = "namespace"
+ replacement = "$1"
+ }
+ rule {
+ regex = "^(exported_.*|image_.*|container_id|id|uid)$"
+ action = "labeldrop"
+ }
+}
+
+discovery.relabel "cadvisor" {
+ targets = discovery.kubernetes.nodes.targets
+ rule {
+ replacement = "/metrics/cadvisor"
+ target_label = "__metrics_path__"
+ }
+}
+
+prometheus.scrape "cadvisor" {
+ targets = discovery.relabel.cadvisor.output
+ forward_to = [ prometheus.relabel.cadvisor.receiver ]
+ scheme = "https"
+ tls_config {
+ insecure_skip_verify = true
+ }
+ bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+ scrape_interval = "{{ .Values.global.telemetry.metrics.scrape_interval }}"
+ scrape_timeout = "{{ .Values.global.telemetry.metrics.scrape_timeout }}"
+}
+
+prometheus.relabel "cadvisor" {
+ forward_to = [ prometheus.remote_write.default.receiver ]
+
+ // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688
+ rule {
+ source_labels = ["__name__","container"]
+ separator = "@"
+ regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@"
+ action = "drop"
+ }
+ // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688
+ rule {
+ source_labels = ["__name__","image"]
+ separator = "@"
+ regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@"
+ action = "drop"
+ }
+ // Drop irrelevant series
+ rule {
+ source_labels = ["container"]
+ regex = "^POD$"
+ action = "drop"
+ }
+ // Drop unnecessary labels
+ rule {
+ source_labels = ["id"]
+ target_label = "id"
+ replacement = ""
+ }
+ rule {
+ source_labels = ["job"]
+ target_label = "job"
+ replacement = ""
+ }
+ rule {
+ source_labels = ["name"]
+ target_label = "name"
+ replacement = ""
+ }
+}
+
+prometheus.remote_write "default" {
+ endpoint {
+ url ="http://{{ include "prometheus.server.fullname" .Subcharts.prometheus }}.{{ .Release.Namespace }}.{{ .Values.global.zone }}/api/v1/write"
+
+ // drop instance label which unnecessarily adds new series when pods are restarted, since pod IPs are dynamically assigned
+ // NOTE: "__address__" is mapped to "instance", so will contain :
+ write_relabel_config {
+ regex = "instance"
+ action = "labeldrop"
+ }
+ }
+}
+
+{{- if $agent.withOTLPReceiver -}}
+otelcol.receiver.otlp "otlp_receiver" {
+ grpc {
+ endpoint = "0.0.0.0:4317"
+ }
+ http {
+ endpoint = "0.0.0.0:4318"
+ }
+ output {
+ metrics = [otelcol.processor.batch.default.input]
+ logs = [otelcol.processor.batch.default.input]
+ }
+}
+otelcol.exporter.prometheus "to_prometheus" {
+ forward_to = [
+ prometheus.remote_write.default.receiver,
+ ]
+}
+otelcol.exporter.loki "to_loki" {
+ forward_to = [
+ loki.write.loki.receiver,
+ ]
+}
+otelcol.processor.batch "default" {
+ output {
+ metrics = [otelcol.exporter.prometheus.to_prometheus.input]
+ logs = [otelcol.exporter.loki.to_loki.input]
+ }
+}
+{{- end -}}
+
+{{ with .Values.global.coder.scrapeMetrics }}
+prometheus.scrape "coder_metrics" {
+ targets = [
+ {"__address__" = "{{ .hostname }}:{{ .port }}", {{ include "collector-labels" .additionalLabels | trimSuffix "," }}},
+ ]
+
+ forward_to = [prometheus.remote_write.default.receiver]
+ scrape_interval = "{{ .scrapeInterval }}"
+}
+{{- end }}
+{{- end }}
+
+{{- define "collector-labels" -}}
+{{- range $key, $val := . -}}
+{{ $key }} = "{{ $val }}",
+{{- end -}}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/_helpers.tpl b/coder-observability/templates/_helpers.tpl
index 5d06184..0d8578d 100644
--- a/coder-observability/templates/_helpers.tpl
+++ b/coder-observability/templates/_helpers.tpl
@@ -61,23 +61,64 @@ Create the name of the service account to use
{{- end }}
{{- end }}
-{{/*
-Create the name of the service account to use
-*/}}
-{{- define "coder-observability.datasources" -}}
-apiVersion: 1
-datasources:
- - name: prometheus
- type: prometheus
- url: http://prometheus-server.monitoring.svc.cluster.local
- access: proxy
- isDefault: true
- editable: false
- - name: loki
- type: loki
- url: http://loki-gateway.monitoring.svc.cluster.local
- access: proxy
- isDefault: false
- editable: false
+{{/* Postgres connector string */}}
+{{- define "postgres-connector-string" -}}
+{{- if and .Values.global.postgres.password (eq .Values.global.postgres.sslmode "disable") -}}
+postgresql://{{ .Values.global.postgres.username }}:{{ urlquery .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}
+{{- else if and .Values.global.postgres.password (ne .Values.global.postgres.sslmode "disable") -}}
+{{- if .Values.global.postgres.sslrootcert -}}
+postgresql://{{ .Values.global.postgres.username }}:{{ urlquery .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}&sslrootcert={{ .Values.global.postgres.sslrootcert }}
+{{- else -}}
+postgresql://{{ .Values.global.postgres.username }}:{{ urlquery .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}
+{{- end -}}
+{{- else if and .Values.global.postgres.mountSecret (eq .Values.global.postgres.sslmode "disable") -}}
+postgresql://{{ .Values.global.postgres.username }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}
+{{- else if and .Values.global.postgres.mountSecret (ne .Values.global.postgres.sslmode "disable") -}}
+{{- if .Values.global.postgres.sslrootcert -}}
+postgresql://{{ .Values.global.postgres.username }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}&sslrootcert={{ .Values.global.postgres.sslrootcert }}
+{{- else -}}
+postgresql://{{ .Values.global.postgres.username }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}
+{{- end -}}
+{{- else -}}
+{{ fail "either postgres.password or postgres.mountSecret must be defined" }}
+{{- end -}}
+{{- end }}
+
+{{/* Postgres connector string */}}
+{{- define "postgres-secret-mount" -}}
+{{ if .Values.global.postgres.mountSecret }}
+envFrom:
+ - secretRef:
+ name: {{ .Values.global.postgres.mountSecret }}
+{{ end }}
+{{- end }}
+
+{{/* Postgres Exporter does not export a pubsub usage metric by default, so we add one */}}
+{{- define "postgres-pubsub-queue-usage-metric-name" -}}pg_pubsub_usage{{- end }}
+
+{{/* Build a runbook URL */}}
+{{- define "runbook-url" -}}
+{{ $outer := . }}
+{{- with .Values.global -}}
+ {{- .externalScheme }}://runbook-viewer.{{ $outer.Release.Namespace }}.{{ .externalZone }}/{{- $outer.service }}#{{- $outer.alert | lower }}
+{{- end }}
{{- end }}
+{{- define "coderd-selector" -}} {{- printf "%s, namespace=`%s`" .Values.global.coder.coderdSelector .Values.global.coder.controlPlaneNamespace -}} {{- end }}
+{{- define "provisionerd-selector" -}} {{- printf "%s, namespace=`%s`" .Values.global.coder.provisionerdSelector .Values.global.coder.externalProvisionersNamespace -}} {{- end }}
+{{- define "workspaces-selector" -}} {{- .Values.global.coder.workspacesSelector -}} {{- end }}
+{{- define "non-workspace-selector" -}} {{- printf "namespace=~`(%s|%s)`" (include "control-plane-namespace" .) (include "external-provisioners-namespace" .) -}} {{- end }}
+{{- define "control-plane-namespace" -}} {{- .Values.global.coder.controlPlaneNamespace -}} {{- end }}
+{{- define "external-provisioners-namespace" -}} {{- .Values.global.coder.externalProvisionersNamespace -}} {{- end }}
+
+{{/* The collector creates "job" labels in the form // */}}
+
+{{/* Prometheus job label */}}
+{{- define "prometheus-job" -}} {{- printf "%s/%s/%s" .Release.Namespace .Values.prometheus.server.fullnameOverride .Values.prometheus.server.name -}} {{- end }}
+{{/* Loki job label */}}
+{{- define "loki-job" -}} {{- printf "%s/%s" .Release.Namespace .Values.loki.fullnameOverride -}} {{- end }}
+{{/* Grafana Agent job label */}}
+{{- define "grafana-agent-job" -}} {{- printf "%s/%s/%s" .Release.Namespace (index .Values "grafana-agent").fullnameOverride "grafana-agent" -}} {{- end }}
+
+{{- define "dashboard-range" -}} {{ .Values.global.dashboards.timerange }} {{- end }}
+{{- define "dashboard-refresh" -}} {{ .Values.global.dashboards.refresh }} {{- end }}
\ No newline at end of file
diff --git a/coder-observability/templates/configmap-collector.yaml b/coder-observability/templates/configmap-collector.yaml
index f3a6dc2..919b089 100644
--- a/coder-observability/templates/configmap-collector.yaml
+++ b/coder-observability/templates/configmap-collector.yaml
@@ -2,225 +2,7 @@
kind: ConfigMap
apiVersion: v1
metadata:
- name: {{ .Values.collector.agent.configMap.name }}
+ name: {{ (index .Values "grafana-agent").agent.configMap.name }}
namespace: {{ .Release.Namespace }}
data:
- config.river: |
- logging {
- level = "debug"
- format = "logfmt"
- }
-
- // read the credentials secret for remote_write authorization
- // remote.kubernetes.secret "credentials" {
- // namespace = "monitoring"
- // name = "primary-credentials-logs"
- // }
-
- discovery.kubernetes "pods" {
- role = "pod"
- selectors {
- role = "pod"
- }
- }
-
- discovery.relabel "pod_logs" {
- targets = discovery.kubernetes.pods.targets
- rule {
- source_labels = ["__meta_kubernetes_namespace"]
- target_label = "namespace"
- }
- rule {
- source_labels = ["__meta_kubernetes_pod_name"]
- target_label = "pod"
- }
- rule {
- source_labels = ["__meta_kubernetes_pod_container_name"]
- target_label = "container"
- }
- rule {
- source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
- separator = "/"
- target_label = "job"
- }
- rule {
- source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
- separator = "/"
- action = "replace"
- replacement = "/var/log/pods/*$1/*.log"
- target_label = "__path__"
- }
- rule {
- action = "replace"
- source_labels = ["__meta_kubernetes_pod_container_id"]
- regex = "^(\\w+):\\/\\/.+$"
- replacement = "$1"
- target_label = "tmp_container_runtime"
- }
- }
-
- // TODO: share common relabelings
- discovery.relabel "pod_metrics" {
- targets = discovery.kubernetes.pods.targets
-
- rule {
- source_labels = ["__meta_kubernetes_namespace"]
- target_label = "namespace"
- }
- rule {
- source_labels = ["__meta_kubernetes_pod_name"]
- target_label = "pod"
- }
- rule {
- source_labels = ["__meta_kubernetes_pod_container_name"]
- target_label = "container"
- }
- rule {
- source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"]
- separator = "/"
- target_label = "job"
- action = "replace"
- }
-
- // adapted from the Prometheus helm chart
- // https://github.com/prometheus-community/helm-charts/blob/862870fc3c847e32479b509e511584d5283126a3/charts/prometheus/values.yaml#L1070
- rule {
- source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
- action = "keep"
- regex = "true"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scheme"]
- action = "replace"
- regex = "(https?)"
- target_label = "__scheme__"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
- action = "replace"
- target_label = "__metrics_path__"
- regex = "(.+)"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
- action = "replace"
- regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})"
- replacement = "[$2]:$1"
- target_label = "__address__"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
- action = "replace"
- regex = "(\\d+);((([0-9]+?)(\\.|$)){4})"
- replacement = "$2:$1"
- target_label = "__address__"
- }
-
- rule {
- action = "labelmap"
- regex = "__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)"
- replacement = "__param_$1"
- }
-
- rule {
- action = "labelmap"
- regex = "__meta_kubernetes_pod_label_(.+)"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_namespace"]
- action = "replace"
- target_label = "namespace"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_name"]
- action = "replace"
- target_label = "pod"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_phase"]
- regex = "Pending|Succeeded|Failed|Completed"
- action = "drop"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_node_name"]
- action = "replace"
- target_label = "node"
- }
- }
-
- local.file_match "pod_logs" {
- path_targets = discovery.relabel.pod_logs.output
- }
-
- loki.source.file "pod_logs" {
- targets = local.file_match.pod_logs.targets
- forward_to = [loki.process.pod_logs.receiver]
- }
-
- // basic processing to parse the container format. You can add additional processing stages
- // to match your application logs.
- loki.process "pod_logs" {
- stage.match {
- selector = "{tmp_container_runtime=\"containerd\"}"
- // the cri processing stage extracts the following k/v pairs: log, stream, time, flags
- stage.cri {}
- // Set the extract flags and stream values as labels
- stage.labels {
- values = {
- flags = "",
- stream = "",
- }
- }
- }
-
- // if the label tmp_container_runtime from above is docker parse using docker
- stage.match {
- selector = "{tmp_container_runtime=\"docker\"}"
- // the docker processing stage extracts the following k/v pairs: log, stream, time
- stage.docker {}
-
- // Set the extract stream value as a label
- stage.labels {
- values = {
- stream = "",
- }
- }
- }
-
- // drop the temporary container runtime label as it is no longer needed
- stage.label_drop {
- values = ["tmp_container_runtime"]
- }
-
- forward_to = [loki.write.loki.receiver]
- }
-
- // TODO reference release name
- loki.write "loki" {
- endpoint {
- url = "http://{{ include "loki.fullname" .Subcharts.logs }}-gateway.{{ .Release.Namespace }}.svc.cluster.local/loki/api/v1/push"
- // basic_auth {
- // username = nonsensitive(remote.kubernetes.secret.credentials.data["username"])
- // password = remote.kubernetes.secret.credentials.data["password"]
- // }
- }
- }
-
- prometheus.scrape "pods" {
- targets = discovery.relabel.pod_metrics.output
- forward_to = [prometheus.remote_write.default.receiver]
- }
-
- prometheus.remote_write "default" {
- endpoint {
- url ="http://{{ include "prometheus.server.fullname" .Subcharts.metrics }}.{{ .Release.Namespace }}.svc.cluster.local/api/v1/write"
- }
- }
\ No newline at end of file
+ config.river: |- {{- include "collector-config" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/configmap-prometheus-alerts.yaml b/coder-observability/templates/configmap-prometheus-alerts.yaml
new file mode 100644
index 0000000..bf9bcc4
--- /dev/null
+++ b/coder-observability/templates/configmap-prometheus-alerts.yaml
@@ -0,0 +1,256 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: metrics-alerts
+ namespace: {{ .Release.Namespace }}
+data:
+ {{- $service := dict "service" "coderd" -}}
+
+ {{- with .Values.global.coder.alerts.coderd }} {{/* start-section */}}
+ coderd.yaml: |-
+ groups:
+ {{- with .groups.CPU }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: CPU Usage
+ rules:
+ {{ $alert := "CoderdCPUUsage" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: max by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "coderd-selector" $ -}} }[{{- $group.period -}}])) / max by(pod) (kube_pod_container_resource_limits{ {{- include "coderd-selector" $ -}}, resource="cpu"}) > {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: The Coder instance {{ `{{ $labels.pod }}` }} is using high amounts of CPU, which may impact application performance.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- with .groups.Memory }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Memory Usage
+ rules:
+ {{ $alert := "CoderdMemoryUsage" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: max by (pod) (container_memory_working_set_bytes{ {{- include "coderd-selector" $ -}} }) / max by (pod) (kube_pod_container_resource_limits{ {{- include "coderd-selector" $ -}}, resource="memory"}) > {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: The Coder instance {{ `{{ $labels.pod }}` }} is using high amounts of memory, which may lead to an Out-Of-Memory (OOM) error.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- with .groups.Restarts }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Pod Restarts
+ rules:
+ {{ $alert := "CoderdRestarts" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{ {{- include "coderd-selector" $ -}} }[{{- $group.period -}}])) > {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: The Coder instance {{ `{{ $labels.pod }}` }} has restarted multiple times in the last {{ $group.period -}}, which may indicate a CrashLoop.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- with .groups.Replicas }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Coderd Replicas
+ rules:
+ {{ $alert := "CoderdReplicas" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: sum(up{ {{- include "coderd-selector" $ -}} }) < {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: Number of alive coderd replicas is below the threshold = {{ $threshold -}}.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- with .groups.WorkspaceBuildFailures }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Coderd Workspace Build Failures
+ rules:
+ {{ $alert := "CoderdWorkspaceBuildFailures" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: sum(increase(coderd_workspace_builds_total{ {{- include "coderd-selector" $ -}} , status="failed" }[{{- $group.period -}}])) > {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: Workspace builds have failed multiple times in the last {{ $group.period -}}, which may indicate a broken Coder template.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- with .groups.IneligiblePrebuilds }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Coderd Ineligible Prebuilds
+ rules:
+ {{ $alert := "CoderdIneligiblePrebuilds" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_running - coderd_prebuilt_workspaces_eligible) > 0
+ for: {{ $group.delay }}
+ annotations:
+ summary: >
+ {{ `{{ $value }}` }} prebuilt workspace(s) are currently ineligible for claiming for the "{{ `{{ $labels.template_name }}` }}" template and "{{ `{{ $labels.preset_name }}` }}" preset.
+ This usually indicates that the agent has not started correctly, or is still running its startup scripts after an extended period of time.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- with .groups.UnprovisionedPrebuiltWorkspaces }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Coderd Unprovisioned Prebuilt Workspaces
+ rules:
+ {{ $alert := "CoderdUnprovisionedPrebuiltWorkspaces" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_desired - coderd_prebuilt_workspaces_running) > 0
+ for: {{ $group.delay }}
+ annotations:
+ summary: >
+ {{ `{{ $value }}` }} prebuilt workspace(s) not yet been provisioned for the "{{ `{{ $labels.template_name }}` }}" template and "{{ `{{ $labels.preset_name }}` }}" preset.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- end }} {{/* end-section */}}
+
+
+ {{- with .Values.global.coder.alerts.provisionerd }} {{/* start-section */}}
+ provisionerd.yaml: |-
+ groups:
+ {{- with .groups.Replicas }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Provisionerd Replicas
+ rules:
+ {{ $alert := "ProvisionerdReplicas" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: sum(coderd_provisionerd_num_daemons{ {{- include "coderd-selector" $ -}} }) < {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: Number of alive provisionerd replicas is below the threshold = {{ $threshold -}}.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+
+ {{- end }} {{/* end-section */}}
+
+
+ {{- $service = dict "service" "enterprise" -}}
+
+ {{- with .Values.global.coder.alerts.enterprise }} {{/* start-section */}}
+ enterprise.yaml: |-
+ groups:
+ {{- with .groups.Licences }}
+ {{- $group := . }}
+ {{- if .enabled }}
+ - name: Licences
+ rules:
+ {{ $alert := "CoderLicenseSeats" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: 'max(coderd_license_active_users) / max(coderd_license_limit_users) >= {{- $threshold }}'
+ for: {{ $group.delay }}
+ annotations:
+ summary: Your Coder enterprise licence usage is now at {{ `{{ $value | humanizePercentage }}` }} capacity.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+ {{- end }} {{/* end-section */}}
+
+ {{- $service = dict "service" "postgres" -}}
+ {{- with .Values.global.postgres }}
+ postgres.yaml: |-
+ groups:
+ {{- with .alerts.groups.Notifications }}
+ {{- $group := . -}}
+ {{- if .enabled }}
+ - name: Notifications
+ rules:
+ {{ $alert := "PostgresNotificationQueueFillingUp" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: {{ include "postgres-pubsub-queue-usage-metric-name" . }} > {{ $threshold }}
+ for: {{ $group.delay }}
+ annotations:
+ summary: The postgres instance {{ `{{ $labels.instance }}` }} has a notification that is filling up, which may impact application performance.
+ labels:
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end -}}
+ {{- end -}}
+ {{- with .alerts.groups.Basic }}
+ {{ $group := . -}}
+ {{- if .enabled }}
+ - name: Liveness
+ rules:
+ {{ $alert := "PostgresDown" }}
+ - alert: {{ $alert }}
+ expr: pg_up == 0
+ for: {{ $group.delay }}
+ annotations:
+ summary: The postgres instance {{ `{{ $labels.instance }}` }} is down!
+ labels:
+ severity: critical
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{ end }}
+ {{- with .alerts.groups.Connections }}
+ {{ $group := . -}}
+ {{- if .enabled }}
+ - name: Connections
+ rules:
+ {{ $alert := "PostgresConnectionsRunningLow" }}
+ {{- range $severity, $threshold := .thresholds }}
+ - alert: {{ $alert }}
+ expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * {{ $threshold }})
+ for: {{ $group.delay }}
+ labels:
+ summary: The postgres instance {{ `{{ $labels.instance }}` }} is running low on connections which may impact application performance.
+ severity: {{ $severity }}
+ runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }}
+ {{- end }}
+ {{- end -}}
+ {{- end -}}
+ {{ end }}
diff --git a/coder-observability/templates/configmap-runbooks.yaml b/coder-observability/templates/configmap-runbooks.yaml
new file mode 100644
index 0000000..80eb085
--- /dev/null
+++ b/coder-observability/templates/configmap-runbooks.yaml
@@ -0,0 +1,10 @@
+---
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: runbooks
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ checksum/config: {{ (.Files.Glob "runbooks/**").AsConfig | indent 2 | sha256sum }}
+data:
+{{ (.Files.Glob "runbooks/**").AsConfig | indent 2 }}
\ No newline at end of file
diff --git a/coder-observability/templates/configmap-sql-exporter.yaml b/coder-observability/templates/configmap-sql-exporter.yaml
new file mode 100644
index 0000000..08f6d9c
--- /dev/null
+++ b/coder-observability/templates/configmap-sql-exporter.yaml
@@ -0,0 +1,27 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: sql-exporter-config
+ namespace: {{ .Release.Namespace }}
+data:
+ config.yaml: |-
+ global:
+ target:
+ name: postgres
+ data_source_name: '{{ include "postgres-connector-string" . }}'
+ collectors:
+ - notify
+ collectors:
+ - collector_name: notify
+ metrics:
+ # Add a metric to show the current usage of the Postgres "pub/sub" mechanism
+ # See https://www.postgresql.org/docs/current/functions-info.html
+ - metric_name: {{ include "postgres-pubsub-queue-usage-metric-name" . }}
+ type: gauge
+ help: "The fraction (0–1) of the asynchronous notification queue's maximum size that is currently occupied by notifications that are waiting to be processed"
+ static_labels:
+ hostname: {{ .Values.global.postgres.hostname }}
+ database: {{ .Values.global.postgres.database }}
+ values: [ usage ]
+ query: |
+ SELECT pg_notification_queue_usage() AS usage;
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/_dashboards_coderd.json.tpl b/coder-observability/templates/dashboards/_dashboards_coderd.json.tpl
new file mode 100644
index 0000000..20a0ece
--- /dev/null
+++ b/coder-observability/templates/dashboards/_dashboards_coderd.json.tpl
@@ -0,0 +1,1474 @@
+{{ define "coderd-dashboard.json" }}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Down"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 0
+ },
+ "id": 10,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(up{ {{- include "coderd-selector" . -}} } == 1) or vector(0)",
+ "instant": true,
+ "legendFormat": "Up",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "(count(up{ {{- include "coderd-selector" . -}} } == 0) or vector(0)) > 0",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Down",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Replicas",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 18,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "One or more replicas are required to be running in order to serve the control-plane.\n\nSee [High Availability](https://coder.com/docs/v2/latest/admin/high-availability) for details on how to\nrun multiple `coderd` replicas.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "#EAB839",
+ "value": 0.9
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Enabled"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "0": {
+ "index": 1,
+ "text": "No"
+ },
+ "1": {
+ "index": 0,
+ "text": "Yes"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ]
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 0
+ },
+ "id": 32,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_license_user_limit_enabled)",
+ "instant": true,
+ "legendFormat": "Enabled",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "(\n max(coderd_license_active_users) / max(coderd_license_limit_users)\n) > 0",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Usage",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Enterprise License",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 0
+ },
+ "id": 33,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "If you would like to try Coder's [Enterprise features](https://coder.com/docs/v2/latest/enterprise), you can [request a trial license](https://coder.com/docs/v2/latest/faqs#how-do-i-add-an-enterprise-license).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Requested|Limit)/"
+ },
+ "properties": [
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 6
+ },
+ "id": 25,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "coderd-selector" . -}} }[$__rate_interval]))",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_limits{ {{- include "coderd-selector" . -}} , resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_requests{ {{- include "coderd-selector" . -}} , resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "CPU Usage Seconds",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 6
+ },
+ "id": 26,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The cumulative CPU used per core-second. If `coderd` was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "red",
+ "mode": "shades"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 6
+ },
+ "id": 30,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (reason) (\n count_over_time(kube_pod_container_status_terminated_reason{ {{- include "coderd-selector" . -}} }[$__interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": {{ printf "{{reason}}" | quote }},
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "Terminations",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 0.0001
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 2,
+ "x": 16,
+ "y": 6
+ },
+ "id": 34,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(increase(kube_pod_container_status_restarts_total{ {{- include "coderd-selector" . -}} }[$__range]))",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Restarts",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 6
+ },
+ "id": 31,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Requested|Limit)/"
+ },
+ "properties": [
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 12
+ },
+ "id": 29,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (pod) (container_memory_working_set_bytes{ {{- include "coderd-selector" . -}} })",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_limits{ {{- include "coderd-selector" . -}} , resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_requests{ {{- include "coderd-selector" . -}} , resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "RAM Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 12
+ },
+ "id": 28,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the total memory used by each `coderd` container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 100
+ },
+ {
+ "color": "red",
+ "value": 500
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Errors"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "short"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 12,
+ "y": 12
+ },
+ "id": 16,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.5, coder_pubsub_send_latency_seconds)",
+ "instant": false,
+ "legendFormat": "Send",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.5, coder_pubsub_receive_latency_seconds)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Receive",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pubsub Latency (Median)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Errors"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "short"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 2,
+ "x": 16,
+ "y": 12
+ },
+ "id": 22,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "(\n sum(increase(coder_pubsub_latency_measure_errs_total[$__range]))\n / count(coder_pubsub_latency_measure_errs_total)\n) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Errors",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pubsub Errors",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 12
+ },
+ "id": 19,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "`coderd` uses Postgres for passing messages between subcomponents for coordination and signalling;\nthis is called \"pubsub\" (or publish-subscribe).\n\nWe measure the time for messages to be sent and received. Latencies higher than 500ms will likely lead to\nyour Coder deployment feeling sluggish. High latency is usually an indication that your Postgres server is under-resourced on CPU.\n\nHigh values for median should be concerning,\nwhile the 90th percentile shows the outliers.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 100
+ },
+ {
+ "color": "red",
+ "value": 500
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Errors"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "short"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 12,
+ "y": 15
+ },
+ "id": 21,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.9, coder_pubsub_send_latency_seconds)",
+ "instant": false,
+ "legendFormat": "Send",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.9, coder_pubsub_receive_latency_seconds)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Receive",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pubsub Latency (P90)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 0,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "reqps"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 18
+ },
+ "id": 35,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by(pod) (rate(coderd_api_requests_processed_total{ {{- include "coderd-selector" . -}} }[$__rate_interval]))",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "API Requests",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 18
+ },
+ "id": 36,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the number of requests per second each `coderd` replica is handling.\n\nHeavy skewing towards a single `coderd` replica indicates faulty loadbalancing.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "{{- include "dashboard-refresh" . -}}",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-{{- include "dashboard-range" . -}}",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Control Plane",
+ "uid": "coderd",
+ "version": 6,
+ "weekStart": ""
+}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/_dashboards_prebuilds.json.tpl b/coder-observability/templates/dashboards/_dashboards_prebuilds.json.tpl
new file mode 100644
index 0000000..938b501
--- /dev/null
+++ b/coder-observability/templates/dashboards/_dashboards_prebuilds.json.tpl
@@ -0,0 +1,1050 @@
+{{ define "prebuilds-dashboard.json" }}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": 10,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "orange",
+ "index": 2,
+ "text": "Not enabled"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Enabled"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 1,
+ "text": "Not enabled"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 15,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "valueSize": 15
+ },
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(coderd_experiments{experiment=\"workspace-prebuilds\"})",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Experiment enabled?",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 49,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max(coderd_prebuilt_workspaces_desired) by (template_name, preset_name)) or vector(0)",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Desired",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max(coderd_prebuilt_workspaces_running) by (template_name, preset_name)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Running",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max(coderd_prebuilt_workspaces_eligible) by (template_name, preset_name)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Eligible",
+ "range": false,
+ "refId": "E"
+ }
+ ],
+ "title": "Current: Global",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 48,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_created_total)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Created",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_failed_total)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Failed",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_claimed_total)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Claimed",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "All Time: Global",
+ "type": "stat"
+ },
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 2,
+ "panels": [],
+ "repeat": "template",
+ "repeatDirection": "h",
+ "title": "$template",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 0,
+ "y": 5
+ },
+ "id": 31,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Desired",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Running",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Eligible",
+ "range": false,
+ "refId": "E"
+ }
+ ],
+ "title": "Current: $preset",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMax": 10,
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 18,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "smooth",
+ "lineStyle": {
+ "fill": "solid"
+ },
+ "lineWidth": 2,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Desired"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 10,
+ 10
+ ],
+ "fill": "dash"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 85
+ },
+ {
+ "id": "custom.fillBelowTo",
+ "value": "Running"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Running"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillBelowTo",
+ "value": "Eligible"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Eligible"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 4,
+ "y": 5
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Desired",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Running",
+ "range": true,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Eligible",
+ "range": true,
+ "refId": "E"
+ }
+ ],
+ "title": "Pool Capacity: $preset",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMax": 10,
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 13,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "smooth",
+ "lineStyle": {
+ "fill": "solid"
+ },
+ "lineWidth": 2,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Failed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Created"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "blue",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Desired"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Running"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Eligible"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Claimed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "dark-green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 12,
+ "y": 5
+ },
+ "id": 38,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "floor(max(increase(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Created",
+ "range": true,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "floor(max(increase(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Failed",
+ "range": true,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "floor(max(increase(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Claimed",
+ "range": true,
+ "refId": "F"
+ }
+ ],
+ "title": "Pool Operations: $preset",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 5
+ },
+ "id": 1,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Created",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Failed",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Claimed",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "All Time: $preset",
+ "type": "stat"
+ }
+ ],
+ "refresh": "{{- include "dashboard-refresh" . -}}",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_prebuilt_workspaces_desired,template_name)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Template",
+ "multi": false,
+ "name": "template",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_prebuilt_workspaces_desired,template_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ },
+ {
+ "allValue": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Preset",
+ "multi": true,
+ "name": "preset",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-{{- include "dashboard-range" . -}}",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Prebuilds",
+ "uid": "cej6jysyme22oa",
+ "version": 13,
+ "weekStart": ""
+}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/_dashboards_provisionerd.json.tpl b/coder-observability/templates/dashboards/_dashboards_provisionerd.json.tpl
new file mode 100644
index 0000000..9b855a5
--- /dev/null
+++ b/coder-observability/templates/dashboards/_dashboards_provisionerd.json.tpl
@@ -0,0 +1,1021 @@
+{{ define "provisionerd-dashboard.json" }}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 0
+ },
+ "id": 17,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`})",
+ "instant": true,
+ "legendFormat": "Built-in",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{ {{- include "provisionerd-selector" . -}} })",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "External",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Provisioners",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 20,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Provisioners are responsible for building workspaces.\n\n`coderd` runs built-in provisioners by default. Control this with the `CODER_PROVISIONER_DAEMONS` environment variable or `--provisioner-daemons` flag.\n\nYou can also consider [External Provisioners](https://coder.com/docs/v2/latest/admin/provisioners). Running both built-in and external provisioners is perfectly valid,\nalthough dedicated (external) provisioners will generally give the best build performance.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 0
+ },
+ "id": 21,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "(sum(coderd_provisionerd_jobs_current) > 0) or vector(0)",
+ "instant": false,
+ "legendFormat": "Current",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Capacity",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Builds",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 0
+ },
+ "id": 22,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The maximum number of simultaneous builds is equivalent to the number of `provisionerd` daemons running.\n\nThe \"Capacity\" panel shows the how many simultaneous builds are possible.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 7
+ },
+ "id": 23,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "histogram_quantile(0.5, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Median",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "histogram_quantile(0.9, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "90th Percentile",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Times",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 7
+ },
+ "id": 24,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the median and 90th percentile workspace build times.\n\nLong build times can impede developers' productivity while they wait for workspaces to start or be created.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "failed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Failure"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Success"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 7
+ },
+ "id": 25,
+ "interval": "1h",
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count[$__interval]))",
+ "hide": false,
+ "instant": false,
+ "interval": "1h",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Count Per Hour",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 7
+ },
+ "id": 26,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "_NOTE: this will not show the current hour._",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Limit|Requested)/"
+ },
+ "properties": [
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 14
+ },
+ "id": 28,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "provisionerd-selector" . -}} }[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_limits{ {{- include "provisionerd-selector" . -}} , resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_requests{ {{- include "provisionerd-selector" . -}} , resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "CPU Usage Seconds",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 14
+ },
+ "id": 30,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The cumulative CPU used per core-second. If the process was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Limit|Requested)/"
+ },
+ "properties": [
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 14
+ },
+ "id": 29,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (pod) (container_memory_working_set_bytes{ {{- include "provisionerd-selector" . -}} })",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_limits{ {{- include "provisionerd-selector" . -}} , resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_requests{ {{- include "provisionerd-selector" . -}} , resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "RAM Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 14
+ },
+ "id": 31,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the total memory used by each container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "gridPos": {
+ "h": 18,
+ "w": 18,
+ "x": 0,
+ "y": 21
+ },
+ "id": 27,
+ "options": {
+ "dedupStrategy": "exact",
+ "enableLogDetails": true,
+ "prettifyLogMessage": false,
+ "showCommonLabels": false,
+ "showLabels": false,
+ "showTime": true,
+ "sortOrder": "Descending",
+ "wrapLogMessage": false
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": "{ {{- include "non-workspace-selector" . -}}, logger=~\"(.*runner|terraform|provisioner.*)\"}",
+ "queryType": "range",
+ "refId": "A"
+ }
+ ],
+ "title": "Logs",
+ "type": "logs"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 21
+ },
+ "id": 32,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This panel shows all logs across built-in and [external provisioners](https://coder.com/docs/v2/latest/admin/provisioners).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "{{- include "dashboard-refresh" . -}}",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-{{- include "dashboard-range" . -}}",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Provisioners",
+ "uid": "provisionerd",
+ "version": 10,
+ "weekStart": ""
+}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/_dashboards_status.json.tpl b/coder-observability/templates/dashboards/_dashboards_status.json.tpl
new file mode 100644
index 0000000..6a96f7e
--- /dev/null
+++ b/coder-observability/templates/dashboards/_dashboards_status.json.tpl
@@ -0,0 +1,2076 @@
+{{ define "status-dashboard.json" }}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": false,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 9,
+ "title": "Application",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Down"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 0,
+ "y": 1
+ },
+ "id": 10,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(up{ {{- include "coderd-selector" . -}} } == 1) or vector(0) > 0",
+ "instant": true,
+ "legendFormat": "Up",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(up{ {{- include "coderd-selector" . -}} } == 0) or vector(0) > 0",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Down",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Coder Replicas",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 4,
+ "y": 1
+ },
+ "id": 16,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{ {{- include "coderd-selector" . -}} })",
+ "instant": true,
+ "legendFormat": "Built-in",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{ {{- include "provisionerd-selector" . -}} })",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "External",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Provisioners",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": []
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "failed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Failed"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Success"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 8,
+ "y": 1
+ },
+ "id": 17,
+ "options": {
+ "displayLabels": [
+ "name",
+ "value"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "round(sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count{pod!=``}[$__range])))",
+ "instant": true,
+ "legendFormat": {{ printf "{{status}}" | quote }},
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Workspace Builds",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 12,
+ "y": 1
+ },
+ "id": 18,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(kube_pod_status_ready{condition=\"true\", {{ include "workspaces-selector" . -}}} == 1)\nor\ncount(coderd_api_workspace_latest_build{status=\"running\"})\nor\nvector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Running Workspaces",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/.*RAM/"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 16,
+ "y": 1
+ },
+ "id": 15,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{ {{- include "coderd-selector" . -}} }[1h:1m])\n [$__range:]\n )\n)",
+ "instant": true,
+ "legendFormat": "Control Plane CPU",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{ {{- include "provisionerd-selector" . -}} }[1h:1m])\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Provisioner CPU",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{ {{- include "coderd-selector" . -}} }\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Control Plane RAM",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{ {{- include "provisionerd-selector" . -}} }\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Provisioner RAM",
+ "range": false,
+ "refId": "D"
+ }
+ ],
+ "title": "Resource Usage High Watermark (Cumulative)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 1
+ },
+ "id": 19,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(pg_up) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Postgres",
+ "type": "stat"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 3,
+ "panels": [],
+ "title": "Observability Tools",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 0,
+ "y": 9
+ },
+ "id": 1,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"{{- include "prometheus-job" . -}}\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Prometheus",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 4,
+ "y": 9
+ },
+ "id": 4,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"{{- include "loki-job" . -}}/write\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Write Path",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 8,
+ "y": 9
+ },
+ "id": 5,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"{{- include "loki-job" . -}}/read\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Read Path",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 12,
+ "y": 9
+ },
+ "id": 6,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"{{- include "loki-job" . -}}/backend\", container=\"loki\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Backend",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 16,
+ "y": 9
+ },
+ "id": 7,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"{{- include "loki-job" . -}}/canary\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Canary",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 20,
+ "y": 9
+ },
+ "id": 8,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"{{- include "grafana-agent-job" . -}}\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Grafana Agent",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Unhealthy"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Healthy"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 0,
+ "y": 14
+ },
+ "id": 12,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "prometheus_config_last_reload_successful{job=\"{{- include "prometheus-job" . -}}\"}",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Prometheus Config",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Unhealthy"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Healthy"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 4,
+ "y": 14
+ },
+ "id": 14,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(loki_runtime_config_last_reload_successful) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Config",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Unhealthy"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Healthy"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 8,
+ "y": 14
+ },
+ "id": 13,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "min(agent_config_last_load_successful{job=\"{{- include "grafana-agent-job" . -}}\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Grafana Agent Config",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Retention Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Write-Ahead Log"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Storage"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#f9f9fb",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 12,
+ "y": 14
+ },
+ "id": 11,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "(\n prometheus_tsdb_wal_storage_size_bytes{job=\"{{- include "prometheus-job" . -}}\"} +\n prometheus_tsdb_storage_blocks_bytes{job=\"{{- include "prometheus-job" . -}}\"} +\n prometheus_tsdb_symbol_table_size_bytes{job=\"{{- include "prometheus-job" . -}}\"}\n)\n/\nprometheus_tsdb_retention_limit_bytes{job=\"{{- include "prometheus-job" . -}}\"}",
+ "instant": false,
+ "legendFormat": "Retention limit used",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Prometheus Storage",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 16,
+ "y": 14
+ },
+ "id": 20,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 35
+ },
+ "textMode": "auto",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{namespace=\"{{- .Release.Namespace -}}\", resource=\"cpu\"})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Requested",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{namespace=\"{{- .Release.Namespace -}}\"}[$__rate_interval])\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "High Watermark",
+ "range": false,
+ "refId": "D"
+ }
+ ],
+ "title": "CPU",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 20,
+ "y": 14
+ },
+ "id": 21,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 35
+ },
+ "textMode": "value_and_name",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{namespace=\"{{- .Release.Namespace -}}\", resource=\"memory\"})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Requested",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(container_memory_working_set_bytes{namespace=\"{{- .Release.Namespace -}}\"}[$__range])\n)",
+ "instant": true,
+ "legendFormat": "High Watermark",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "RAM",
+ "type": "stat"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-24h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Status",
+ "uid": "coder-status",
+ "version": 1,
+ "weekStart": ""
+}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/_dashboards_workspace_detail.json.tpl b/coder-observability/templates/dashboards/_dashboards_workspace_detail.json.tpl
new file mode 100644
index 0000000..713cc9a
--- /dev/null
+++ b/coder-observability/templates/dashboards/_dashboards_workspace_detail.json.tpl
@@ -0,0 +1,1344 @@
+{{ define "workspace-detail-dashboard.json" }}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 1.2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 28,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "**HINT**: use the dropdowns above to filter by specific workspace(s).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "blue",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "CPUs Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "none"
+ },
+ {
+ "id": "decimals",
+ "value": 2
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "PVC Capacity"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 20,
+ "x": 0,
+ "y": 1.2
+ },
+ "id": 29,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 40
+ },
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "group by (template_name) (coderd_agents_up{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Template Name",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "group by (template_version) (coderd_agents_up{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Template Version",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "group by (username) (coderd_agents_up{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Owner",
+ "range": false,
+ "refId": "C"
+ }
+ ],
+ "title": "Details",
+ "transformations": [
+ {
+ "id": "concatenate",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value #A": true,
+ "Value #B": true,
+ "Value #C": true,
+ "Value #D": true
+ },
+ "includeByName": {},
+ "indexByName": {
+ "CPUs Requested": 7,
+ "PVC Capacity": 9,
+ "RAM Requested": 8,
+ "Time": 0,
+ "Value #A": 5,
+ "Value #B": 3,
+ "Value #C": 6,
+ "template_name": 2,
+ "template_version": 4,
+ "username": 1
+ },
+ "renameByName": {
+ "Value #C": "",
+ "lifecycle_state": "Agent State",
+ "template_name": "Template",
+ "template_version": "Template Version",
+ "username": "Owner"
+ }
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 1.2
+ },
+ "id": 38,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Essential information about the selected workspace.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "blue",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "CPUs Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "none"
+ },
+ {
+ "id": "decimals",
+ "value": 2
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "PVC Capacity"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 20,
+ "x": 0,
+ "y": 5.2
+ },
+ "id": 36,
+ "options": {
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/"
+ },
+ "orientation": "vertical",
+ "textMode": "value_and_name",
+ "wideLayout": false,
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 40
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", {{ include "workspaces-selector" . -}}, resource=\"cpu\"})",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "CPUs Requested",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", {{ include "workspaces-selector" . -}}, resource=\"memory\"})",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "RAM Requested",
+ "range": false,
+ "refId": "E"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\".*$workspace_name.*\", {{- include "workspaces-selector" . -}} }\n * on(persistentvolumeclaim) group_right\n group by (persistentvolumeclaim, persistentvolume) (\n label_replace(\n kube_persistentvolume_claim_ref,\n \"persistentvolumeclaim\",\n \"$1\",\n \"name\",\n \"(.+)\"\n )\n )\n * on (persistentvolume)\n kube_persistentvolume_capacity_bytes\n)",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "PVC Capacity",
+ "range": false,
+ "refId": "F"
+ }
+ ],
+ "title": "Resources",
+ "transformations": [
+ {
+ "id": "concatenate",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value #A": true,
+ "Value #B": true,
+ "Value #C": true,
+ "Value #D": true
+ },
+ "includeByName": {},
+ "indexByName": {
+ "CPUs Requested": 7,
+ "PVC Capacity": 9,
+ "RAM Requested": 8,
+ "Time": 0,
+ "Value #A": 5,
+ "Value #B": 3,
+ "Value #C": 6,
+ "template_name": 2,
+ "template_version": 4,
+ "username": 1
+ },
+ "renameByName": {
+ "Value #C": "",
+ "lifecycle_state": "Agent State",
+ "template_name": "Template",
+ "template_version": "Template Version",
+ "username": "Owner"
+ }
+ }
+ }
+ ],
+ "type": "stat",
+ "description": ""
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
+ {
+ "options": {
+ "created": {
+ "color": "light-blue",
+ "index": 1,
+ "text": "Created"
+ },
+ "off": {
+ "color": "text",
+ "index": 8,
+ "text": "Off"
+ },
+ "ready": {
+ "color": "green",
+ "index": 0,
+ "text": "Ready"
+ },
+ "shutdown_error": {
+ "color": "red",
+ "index": 7,
+ "text": "Shutdown Error"
+ },
+ "shutdown_timeout": {
+ "color": "purple",
+ "index": 6,
+ "text": "Shutdown Timeout"
+ },
+ "shutting_down": {
+ "color": "light-purple",
+ "index": 5,
+ "text": "Shutting Down"
+ },
+ "start_error": {
+ "color": "red",
+ "index": 4,
+ "text": "Start Error"
+ },
+ "start_timeout": {
+ "color": "orange",
+ "index": 3,
+ "text": "Start Timeout"
+ },
+ "starting": {
+ "color": "super-light-green",
+ "index": 2,
+ "text": "Starting"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "text",
+ "index": 9,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "text",
+ "index": 10,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 9.2
+ },
+ "id": 35,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/^lifecycle_state$/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "valueSize": 50
+ },
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (lifecycle_state) (coderd_agents_connections{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "D"
+ }
+ ],
+ "title": "Agent Lifecycle State",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
+ {
+ "options": {
+ "-1": {
+ "color": "light-orange",
+ "index": 0,
+ "text": "Not completed yet"
+ }
+ },
+ "type": "value"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "#EAB839",
+ "value": 60
+ },
+ {
+ "color": "red",
+ "value": 120
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 3,
+ "x": 4,
+ "y": 9.2
+ },
+ "id": 33,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/^Value$/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "valueSize": 50
+ },
+ "textMode": "value",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_agentstats_startup_script_seconds{workspace_name=~\"$workspace_name\"}) or vector(-1)",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "C"
+ }
+ ],
+ "title": "Agent Startup Script Execution Time",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 3,
+ "x": 7,
+ "y": 9.2
+ },
+ "id": 39,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 50
+ },
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (app) (\n label_replace(\n {workspace_name=~\"$workspace_name\", __name__=~\"coderd_agentstats_session_count_.*\"},\n \"app\",\n \"$1\",\n \"__name__\",\n \"coderd_agentstats_session_count_(.*)\"\n )\n)>0",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": {{ printf "{{app}}" | quote }},
+ "range": false,
+ "refId": "C"
+ }
+ ],
+ "title": "App Session Counts",
+ "transformations": [
+ {
+ "id": "concatenate",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {}
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/.*Bytes/"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 10,
+ "x": 10,
+ "y": 9.2
+ },
+ "id": 34,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 50
+ },
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_agents_connection_latencies_seconds{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Connection Latency",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_rx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Received Bytes",
+ "range": false,
+ "refId": "rx"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_tx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Transmitted Bytes",
+ "range": false,
+ "refId": "tx"
+ }
+ ],
+ "title": "Networking",
+ "transformations": [
+ {
+ "id": "merge",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "Value #A": "Received Bytes",
+ "Value #B": "Transmitted Bytes",
+ "Value #C": "Connection Latency",
+ "Value #rx": "Received Bytes",
+ "Value #tx": "Transmitted Bytes"
+ }
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 20,
+ "y": 9.2
+ },
+ "id": 40,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Essential information about this workspace's agent.\n\nRead more about the agent [here](https://coder.com/docs/v2/latest/about/architecture#agents).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "filterable": true,
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "status"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "failed": {
+ "color": "orange",
+ "index": 1,
+ "text": "Failure"
+ },
+ "success": {
+ "color": "green",
+ "index": 0,
+ "text": "Success"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Workspace Transition"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "DESTROY": {
+ "color": "red",
+ "index": 0
+ },
+ "START": {
+ "color": "blue",
+ "index": 1
+ },
+ "STOP": {
+ "color": "purple",
+ "index": 2
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 20,
+ "x": 0,
+ "y": 15.2
+ },
+ "id": 6,
+ "interval": "",
+ "options": {
+ "cellHeight": "sm",
+ "footer": {
+ "countRows": false,
+ "enablePagination": true,
+ "fields": [],
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "Time"
+ }
+ ]
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"}\n) > 0",
+ "format": "table",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Log",
+ "transformations": [
+ {
+ "disabled": true,
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Count": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Total": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Value": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Workspace Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Ownert": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_owner": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "desc": true,
+ "field": "Value"
+ }
+ ]
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": false
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "Value": "Count",
+ "Value (sum)": "Total",
+ "status": "Status",
+ "template_name": "Template Name",
+ "template_version": "Template Version",
+ "workspace_name": "Workspace Name",
+ "workspace_owner": "Workspace Owner",
+ "workspace_transition": "Workspace Transition"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 15.2
+ },
+ "id": 37,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 20,
+ "x": 0,
+ "y": 22.2
+ },
+ "id": 7,
+ "options": {
+ "dedupStrategy": "exact",
+ "enableLogDetails": true,
+ "prettifyLogMessage": false,
+ "showCommonLabels": false,
+ "showLabels": false,
+ "showTime": true,
+ "sortOrder": "Descending",
+ "wrapLogMessage": false
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": {{ printf "{%s, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" | line_format `{{ printf \"[\\033[35m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`" (include "non-workspace-selector" .) | quote }},
+ "hide": false,
+ "queryType": "range",
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": {{ printf "{%s, pod=~\".*($workspace_name).*\"} | line_format `{{ printf \"[\\033[32m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`" (include "workspaces-selector" .) | quote }},
+ "hide": false,
+ "queryType": "range",
+ "refId": "B"
+ }
+ ],
+ "title": "Logs",
+ "type": "logs"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 22.2
+ },
+ "id": 24,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The logs to the left come both from provisioners and workspace logs.\n\nProvisioner logs matching the name filter are highlighted in magenta, while\nworkspace logs matching the name filter are highlighted in green.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "{{- include "dashboard-refresh" . -}}",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_agents_up,workspace_name)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Workspace Name Filter",
+ "multi": false,
+ "name": "workspace_name",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_agents_up,workspace_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-{{- include "dashboard-range" . -}}",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Workspace Detail",
+ "uid": "workspace-detail",
+ "version": 9,
+ "weekStart": ""
+}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/_dashboards_workspaces.json.tpl b/coder-observability/templates/dashboards/_dashboards_workspaces.json.tpl
new file mode 100644
index 0000000..afd52d8
--- /dev/null
+++ b/coder-observability/templates/dashboards/_dashboards_workspaces.json.tpl
@@ -0,0 +1,1626 @@
+{{ define "workspaces-dashboard.json" }}
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 1.2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 28,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "**HINT**: use the dropdowns above to filter by specific workspaces and/or templates.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 1.2
+ },
+ "id": 31,
+ "title": "Resources",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 0,
+ "y": 2.2
+ },
+ "id": 33,
+ "options": {
+ "legend": {
+ "calcs": [
+ "mean",
+ "stdDev",
+ "min",
+ "max",
+ "lastNotNull"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "workspaces-selector" . -}} }[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "CPU Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 10,
+ "y": 2.2
+ },
+ "id": 37,
+ "options": {
+ "legend": {
+ "calcs": [
+ "mean",
+ "stdDev",
+ "min",
+ "max",
+ "lastNotNull"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max by (pod) (container_memory_working_set_bytes{ {{- include "workspaces-selector" . -}} })",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "RAM Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 2.2
+ },
+ "id": 36,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The cumulative CPU used per core-second. If a workspace was using a full CPU core, that would be represented as 1 second.\n\nSee the Kubernetes [documentation](https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units) for more details.\n\nThe total memory used by each workspace container is represented; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 0,
+ "y": 10.2
+ },
+ "id": 38,
+ "options": {
+ "legend": {
+ "calcs": [
+ "sum"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (pod) (\n round(increase(kube_pod_container_status_restarts_total{ {{- include "workspaces-selector" . -}} }[$__interval]))\n) > 0",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pod Restarts",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 10,
+ "y": 10.2
+ },
+ "id": 39,
+ "options": {
+ "legend": {
+ "calcs": [
+ "sum"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (pod, reason) (\n count_over_time(kube_pod_container_status_terminated_reason{ {{- include "workspaces-selector" . -}} }[$__interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": {{ printf "{{pod}}:{{reason}}" | quote }},
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Terminations",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 10.2
+ },
+ "id": 40,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible\n\nPod restarts are not necessarily problematic, but they are worth noting.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 18.2
+ },
+ "id": 30,
+ "panels": [],
+ "title": "Builds",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "DESTROY"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "STOP"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "START"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "blue",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 0,
+ "y": 19.2
+ },
+ "id": 2,
+ "interval": "5m",
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Successful Builds by State",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "DESTROY"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "STOP"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "START"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "blue",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 10,
+ "y": 19.2
+ },
+ "id": 1,
+ "interval": "5m",
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Unsuccessful Builds by State",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 19.2
+ },
+ "id": 34,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Workspaces \"transition\" between `STOP`, `START`, and `DESTROY` states.\n\nWorkspaces transition between states when a \"build\" is initiated, which is an execution of `terraform` against the chosen template.\n\nUse the \"Build Count\" table to identify workspace owners which may be struggling with template builds, in order to proactively reach out to them with assistance.\n\nConsult the [Template documentation](https://coder.com/docs/v2/latest/templates) for more information.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "filterable": true,
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "status"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "failed": {
+ "color": "orange",
+ "index": 1,
+ "text": "Failure"
+ },
+ "success": {
+ "color": "green",
+ "index": 0,
+ "text": "Success"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Workspace Transition"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "DESTROY": {
+ "color": "red",
+ "index": 0
+ },
+ "START": {
+ "color": "blue",
+ "index": 1
+ },
+ "STOP": {
+ "color": "purple",
+ "index": 2
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 20,
+ "x": 0,
+ "y": 27.2
+ },
+ "id": 6,
+ "interval": "",
+ "options": {
+ "cellHeight": "sm",
+ "footer": {
+ "countRows": false,
+ "enablePagination": true,
+ "fields": [],
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "Time"
+ }
+ ]
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0",
+ "format": "table",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Log",
+ "transformations": [
+ {
+ "disabled": true,
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Count": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Total": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Value": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Workspace Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Ownert": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_owner": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "desc": true,
+ "field": "Value"
+ }
+ ]
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": false
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "Value": "Count",
+ "Value (sum)": "Total",
+ "status": "Status",
+ "template_name": "Template Name",
+ "template_version": "Template Version",
+ "workspace_name": "Workspace Name",
+ "workspace_owner": "Workspace Owner",
+ "workspace_transition": "Workspace Transition"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 27.2
+ },
+ "id": 29,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 0,
+ "y": 37.2
+ },
+ "id": 8,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (workspace_owner) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Workspace by User",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 5,
+ "y": 37.2
+ },
+ "id": 9,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (workspace_owner, template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": {{ printf "{{workspace_owner}}:{{template_name}}" | quote }},
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Workspace by User/Template",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 10,
+ "y": 37.2
+ },
+ "id": 4,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Template Usage",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 15,
+ "y": 37.2
+ },
+ "id": 5,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (template_name, template_version) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": {{ printf "{{template_name}}:{{template_version}}" | quote }},
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Template Version Usage",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 37.2
+ },
+ "id": 24,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "These charts show the distribution of workspaces and templates.\n\nUse these charts to identify which users have outdated templates, and which templates are the most/least popular in your organisation.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 44.2
+ },
+ "id": 32,
+ "panels": [],
+ "title": "Logs",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 20,
+ "x": 0,
+ "y": 45.2
+ },
+ "id": 7,
+ "options": {
+ "dedupStrategy": "exact",
+ "enableLogDetails": true,
+ "prettifyLogMessage": false,
+ "showCommonLabels": false,
+ "showLabels": false,
+ "showTime": false,
+ "sortOrder": "Descending",
+ "wrapLogMessage": true
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": "{ {{- include "non-workspace-selector" . -}}, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" or \"$template_name\"",
+ "queryType": "range",
+ "refId": "A"
+ }
+ ],
+ "title": "Logs",
+ "type": "logs"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 45.2
+ },
+ "id": 22,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "These are the logs produced by the [Provisioners](/d/provisionerd/provisioners?${__url_time_range}).\n\nUse the dropdowns at the top to filter the logs down to a specific workspace and/or template.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "{{- include "dashboard-refresh" . -}}",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_workspace_builds_total,workspace_name)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Workspace Name Filter",
+ "multi": true,
+ "name": "workspace_name",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_workspace_builds_total,workspace_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "type": "query"
+ },
+ {
+ "allValue": "",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_workspace_builds_total,template_name)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Template Name Filter",
+ "multi": true,
+ "name": "template_name",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_workspace_builds_total,template_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-{{- include "dashboard-range" . -}}",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Workspaces",
+ "uid": "workspaces",
+ "version": 2,
+ "weekStart": ""
+}
+{{ end }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml b/coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml
new file mode 100644
index 0000000..33719f5
--- /dev/null
+++ b/coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-coderd
+ namespace: {{ .Release.Namespace }}
+data:
+ coderd.json: |- {{- include "coderd-dashboard.json" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml b/coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml
new file mode 100644
index 0000000..14d5908
--- /dev/null
+++ b/coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-prebuilds
+ namespace: {{ .Release.Namespace }}
+data:
+ prebuilds.json: |- {{- include "prebuilds-dashboard.json" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml b/coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml
new file mode 100644
index 0000000..0c20e83
--- /dev/null
+++ b/coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-provisionerd
+ namespace: {{ .Release.Namespace }}
+data:
+ provisionerd.json: |- {{- include "provisionerd-dashboard.json" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/configmap-dashboards-status.yaml b/coder-observability/templates/dashboards/configmap-dashboards-status.yaml
new file mode 100644
index 0000000..e307cc5
--- /dev/null
+++ b/coder-observability/templates/dashboards/configmap-dashboards-status.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-status
+ namespace: {{ .Release.Namespace }}
+data:
+ status.json: |- {{- include "status-dashboard.json" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml b/coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml
new file mode 100644
index 0000000..084c5e1
--- /dev/null
+++ b/coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-workspace-detail
+ namespace: {{ .Release.Namespace }}
+data:
+ workspaces-detail.json: |- {{- include "workspace-detail-dashboard.json" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml b/coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml
new file mode 100644
index 0000000..bae657d
--- /dev/null
+++ b/coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-workspaces
+ namespace: {{ .Release.Namespace }}
+data:
+ workspaces.json: |- {{- include "workspaces-dashboard.json" . | trim | nindent 4 }}
\ No newline at end of file
diff --git a/coder-observability/templates/service-runbook-viewer.yaml b/coder-observability/templates/service-runbook-viewer.yaml
new file mode 100644
index 0000000..68c210a
--- /dev/null
+++ b/coder-observability/templates/service-runbook-viewer.yaml
@@ -0,0 +1,12 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: runbook-viewer
+spec:
+ ports:
+ - port: 80
+ targetPort: 3000
+ protocol: TCP
+ selector:
+ app: runbook-viewer
diff --git a/coder-observability/templates/statefulset-postgres-exporter.yaml b/coder-observability/templates/statefulset-postgres-exporter.yaml
new file mode 100644
index 0000000..a1f6e55
--- /dev/null
+++ b/coder-observability/templates/statefulset-postgres-exporter.yaml
@@ -0,0 +1,40 @@
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: postgres-exporter
+ namespace: {{ .Release.Namespace }}
+spec:
+ selector:
+ matchLabels:
+ app: postgres-exporter
+ serviceName: postgres-exporter
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ prometheus.io/scrape: 'true'
+ labels:
+ app: postgres-exporter
+ app.kubernetes.io/name: "database-stats"
+ spec:
+ containers:
+ - name: postgres-exporter
+ image: {{ .Values.global.postgres.exporter.image }}
+ args:
+ - --collector.long_running_transactions
+ ports:
+ - containerPort: 9187
+ name: exporter
+ env:
+ - name: DATA_SOURCE_NAME
+ value: '{{ include "postgres-connector-string" . }}'
+ {{ include "postgres-secret-mount" . | nindent 10 }}
+ {{- if .Values.global.postgres.volumeMounts }}
+ volumeMounts:
+ {{ toYaml .Values.global.postgres.volumeMounts | nindent 12 }}
+ {{- end }}
+ {{- if .Values.global.postgres.volumes }}
+ volumes:
+ {{ toYaml .Values.global.postgres.volumes | nindent 8 }}
+ {{- end }}
\ No newline at end of file
diff --git a/coder-observability/templates/statefulset-runbook-viewer.yaml b/coder-observability/templates/statefulset-runbook-viewer.yaml
new file mode 100644
index 0000000..64f50e4
--- /dev/null
+++ b/coder-observability/templates/statefulset-runbook-viewer.yaml
@@ -0,0 +1,34 @@
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: runbook-viewer
+ namespace: {{ .Release.Namespace }}
+spec:
+ selector:
+ matchLabels:
+ app: runbook-viewer
+ serviceName: runbook-viewer
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ checksum/config: {{ (.Files.Glob "runbooks/**").AsConfig | indent 2 | sha256sum }}
+ labels:
+ app: runbook-viewer
+ spec:
+ containers:
+ - name: madness
+ image: {{ .Values.runbookViewer.image }}
+ ports:
+ - containerPort: 3000
+ name: madness
+ args:
+ - server
+ volumeMounts:
+ - mountPath: /docs/
+ name: runbooks
+ volumes:
+ - name: runbooks
+ configMap:
+ name: runbooks
diff --git a/coder-observability/templates/statefulset-sql-exporter.yaml b/coder-observability/templates/statefulset-sql-exporter.yaml
new file mode 100644
index 0000000..628339e
--- /dev/null
+++ b/coder-observability/templates/statefulset-sql-exporter.yaml
@@ -0,0 +1,37 @@
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: sql-exporter
+ namespace: {{ .Release.Namespace }}
+spec:
+ selector:
+ matchLabels:
+ app: sql-exporter
+ serviceName: sql-exporter
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ prometheus.io/scrape: 'true'
+ checksum/config: {{ include (print $.Template.BasePath "/configmap-sql-exporter.yaml") . | sha256sum }}
+ labels:
+ app: sql-exporter
+ app.kubernetes.io/name: "database-stats"
+ spec:
+ containers:
+ - name: sql-exporter
+ image: {{ .Values.sqlExporter.image }}
+ args:
+ - -config.file=/cfg/config.yaml
+ ports:
+ - containerPort: 9399
+ name: exporter
+ volumeMounts:
+ - mountPath: /cfg/
+ name: config
+ {{ include "postgres-secret-mount" . | nindent 10 }}
+ volumes:
+ - name: config
+ configMap:
+ name: sql-exporter-config
diff --git a/coder-observability/values.yaml b/coder-observability/values.yaml
index ad8c562..6d06981 100644
--- a/coder-observability/values.yaml
+++ b/coder-observability/values.yaml
@@ -1,11 +1,179 @@
-fullnameOverride: null
global:
+ coder:
+ # global.coder.scrapeMetrics -- use this to scrape metrics from a standalone (set of) coder deployment(s)
+ # if using kubernetes, rather add an annotation "prometheus.io/scrape=true" and coder will get automatically scraped;
+ # set this value to null and configure coderdSelector to target your coder pods
+ scrapeMetrics: null
+# hostname: localhost
+# port: 2112
+# scrapeInterval: 15s
+# additionalLabels:
+# job: coder
+ # global.coder.coderdSelector -- series selector for Prometheus/Loki to locate provisioner pods.
+ # ensure this uses backticks for quotes!
+ coderdSelector: 'pod=~`coder.*`, pod!~`.*provisioner.*`'
+ # global.coder.provisionerdSelector -- series selector for Prometheus/Loki to locate provisioner pods.
+ # https://coder.com/docs/v2/latest/admin/provisioners
+ # TODO: rename container label in provisioner helm chart to be "provisioner" not "coder"
+ # ensure this uses backticks for quotes!
+ provisionerdSelector: 'pod=~`coder-provisioner.*`'
+ # global.coder.workspacesSelector -- the namespace into which any external provisioners have been deployed.
+ workspacesSelector: 'namespace=`coder-workspaces`'
+ # global.coder.controlPlaneNamespace -- the namespace into which the control plane has been deployed.
+ controlPlaneNamespace: coder
+ # global.coder.externalProvisionersNamespace -- the namespace into which any external provisioners have been deployed.
+ externalProvisionersNamespace: coder
+ # See https://coder.com/docs/v2/latest/cli/server#--log-human
+ # "Human" format is the default, which is a combination of plaintext and logfmt but it' quite tricky to parse reliably
+ # with regex matchers.
+ # TODO: support "json" format
+ logFormat: human
+ # global.coder.alerts -- alerts for the various aspects of Coder
+ alerts:
+ enterprise:
+ groups:
+ Licences:
+ enabled: true
+ delay: 1m
+ thresholds:
+ warning: 0.9
+ critical: 1
+ coderd:
+ groups:
+ CPU:
+ enabled: true
+ delay: 10m
+ period: 10m
+ thresholds:
+ warning: 0.8
+ critical: 0.9
+ Memory:
+ enabled: true
+ delay: 10m
+ thresholds:
+ warning: 0.8
+ critical: 0.9
+ Restarts:
+ enabled: true
+ delay: 1m
+ period: 10m
+ thresholds:
+ notify: 1
+ warning: 2
+ critical: 3
+ Replicas:
+ enabled: true
+ delay: 5m
+ thresholds:
+ notify: 3 # 2/3 replicas are alive
+ warning: 2 # 1/3 replicas are alive
+ critical: 1 # 0/3 replicas are alive
+ WorkspaceBuildFailures:
+ enabled: true
+ delay: 10m
+ period: 10m
+ thresholds:
+ notify: 2
+ warning: 5
+ critical: 10
+ IneligiblePrebuilds:
+ enabled: true
+ delay: 10m
+ thresholds:
+ notify: 1
+ UnprovisionedPrebuiltWorkspaces:
+ enabled: true
+ delay: 10m
+ thresholds:
+ warn: 1
+ provisionerd:
+ groups:
+ Replicas:
+ enabled: true
+ delay: 5m
+ thresholds:
+ notify: 3 # 2/3 replicas are alive
+ warning: 2 # 1/3 replicas are alive
+ critical: 1 # 0/3 replicas are alive
+
zone: svc
- metricsName: metrics
-collector:
+ externalScheme: http
+ # The external hostname from which k8s services can be accessed in the form of:
+ # :.<>
+ # e.g.
+ # http://dashboards.coder-observability.svc.cluster.local
+ externalZone: svc.cluster.local
+
+ # global.telemetry -- control telemetry collection
+ telemetry:
+ # global.telemetry.metrics -- control metric collection
+ metrics:
+ # global.telemetry.metrics.scrape_interval -- how often the collector will scrape discovered pods
+ scrape_interval: 15s
+ # global.telemetry.metrics.scrape_timeout -- how long a request will be allowed to wait before being canceled
+ scrape_timeout: 12s
+
+ # global.postgres -- postgres connection information
+ # NOTE: these settings are global so we can parameterise some values which get rendered by subcharts
+ postgres:
+ hostname: localhost
+ port: 5432
+ username: coder
+ password:
+ database: coder
+ sslmode: disable
+ # SSL root certificate path - only required when sslmode != "disable"
+ sslrootcert:
+
+ # ensure that your secret has a field named `PGPASSWORD`
+ mountSecret: "secret-postgres"
+ exporter:
+ image: "quay.io/prometheuscommunity/postgres-exporter"
+
+ # volumes and volumeMounts for SSL certificates
+ volumes: []
+ volumeMounts: []
+
+ # global.postgres.alerts -- alerts for postgres
+ alerts:
+ groups:
+ Basic:
+ enabled: true
+ delay: 1m
+ Notifications:
+ enabled: true
+ delay: 15m
+ thresholds:
+ notify: 0.5
+ warning: 0.8
+ critical: 0.9
+ Connections:
+ enabled: true
+ delay: 5m
+ thresholds:
+ notify: 0.5
+ warning: 0.8
+ critical: 0.9
+
+ # global.dashboards -- settings for bundled dashboards
+ dashboards:
+ # global.dashboards.timerange -- how far back dashboards should look
+ timerange: 12h
+ # global.dashboards.refresh -- how often dashboards should refresh
+ refresh: 30s
+ # global.dashboards.queryTimeout -- how long until a query in Grafana will timeout after
+ queryTimeout: 900
+
+runbookViewer:
+ image: "dannyben/madness"
+
+sqlExporter:
+ image: "burningalchemist/sql_exporter"
+
+grafana-agent:
enabled: true
- fullnameOverride: collector
+ fullnameOverride: grafana-agent
agent:
mode: flow
configMap:
@@ -26,9 +194,98 @@ collector:
crds:
create: false
-dashboards:
+ withOTLPReceiver: false
+
+ # Configuration blocks
+ #
+ # Enable debug logging (warning: produces large amount of logs!)
+ #logging: |-
+ # logging {
+ # level = "debug"
+ # format = "logfmt"
+ # }
+ discovery: |-
+ // Discover k8s nodes
+ discovery.kubernetes "nodes" {
+ role = "node"
+ }
+
+ // Discover k8s pods
+ discovery.kubernetes "pods" {
+ role = "pod"
+ selectors {
+ role = "pod"
+ }
+ }
+ commonRelabellings: |-
+ rule {
+ source_labels = ["__meta_kubernetes_namespace"]
+ target_label = "namespace"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_name"]
+ target_label = "pod"
+ }
+ // coalesce the following labels and pick the first value; we'll use this to define the "job" label
+ rule {
+ source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "app", "__meta_kubernetes_pod_container_name"]
+ separator = "/"
+ target_label = "__meta_app"
+ action = "replace"
+ regex = "^/*([^/]+?)(?:/.*)?$" // split by the delimiter if it exists, we only want the first one
+ replacement = "${1}"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_app"]
+ separator = "/"
+ target_label = "job"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_container_name"]
+ target_label = "container"
+ }
+ rule {
+ regex = "__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)"
+ action = "labeldrop"
+ }
+ rule {
+ regex = "pod_template_generation"
+ action = "labeldrop"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_phase"]
+ regex = "Pending|Succeeded|Failed|Completed"
+ action = "drop"
+ }
+ rule {
+ source_labels = ["__meta_kubernetes_pod_node_name"]
+ action = "replace"
+ target_label = "node"
+ }
+ rule {
+ action = "labelmap"
+ regex = "__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)"
+ replacement = "__param_$1"
+ }
+ extraBlocks: ""
+ # Examples:
+ # loki.source.file "tmpfiles" {
+ # targets = [
+ # {__path__ = "/tmp/foo.txt", "color" = "pink"},
+ # {__path__ = "/tmp/bar.txt", "color" = "blue"},
+ # {__path__ = "/tmp/baz.txt", "color" = "grey"},
+ # ]
+ # forward_to = [loki.write.loki.receiver]
+ # }
+ podMetricsRelabelRules: ""
+ podLogsRelabelRules: ""
+
+grafana:
enabled: true
- fullnameOverride: dashboards
+ image:
+ tag: 10.4.19
+ fullnameOverride: grafana
+ useStatefulSet: true
replicas: 1
deploymentStrategy:
type: Recreate # avoid MultiAttachError for standard-rwo sc
@@ -40,51 +297,161 @@ dashboards:
testFramework:
enabled: false
annotations:
- prometheus.io/scrape: "true" # TODO: this adds annotations to _all_ resources; can we be more specific?
+ # TODO: this adds annotations to _all_ resources; can we be more specific?
+ prometheus.io/scrape: "true"
+ dashboardProviders:
+ infra.yaml:
+ apiVersion: 1
+ providers:
+ - name: infra
+ orgId: 1
+ folder: 'Infrastructure'
+ type: file
+ disableDeletion: false
+ editable: false
+ options:
+ path: /var/lib/grafana/dashboards/infra
+ coder.yaml:
+ apiVersion: 1
+ providers:
+ - name: coder
+ orgId: 1
+ folder: 'Coder'
+ type: file
+ updateIntervalSeconds: 5
+ disableDeletion: false
+ editable: false
+ options:
+ path: /var/lib/grafana/dashboards/coder
+ sidecar.yaml:
+ apiVersion: 1
+ providers:
+ - name: sidecar
+ orgId: 1
+ type: file
+ folder: 'Other'
+ disableDeletion: false
+ updateIntervalSeconds: 30
+ editable: false
+ options:
+ path: /tmp/dashboards
+ dashboards:
+ # TODO: import dashboards from coder/coder
+ infra:
+ node-exporter-full:
+ gnetId: 1860
+ revision: 36
+ datasource: metrics
+ postgres-database:
+ gnetId: 9628
+ revision: 7
+ datasource: metrics
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: metrics
type: prometheus
- url: http://metrics.{{ .Release.Namespace }}.{{ $.Values.global.zone }}
+ url: http://prometheus.{{ .Release.Namespace }}.{{ $.Values.global.zone }}
access: proxy
isDefault: true
editable: false
+ # add 5s on global timeout to distinguish between Grafana timeout & datasource timeout
+ timeout: '{{ add $.Values.global.dashboards.queryTimeout 5 }}'
+ uid: prometheus
- name: logs
type: loki
- url: http://logs-gateway.{{ .Release.Namespace }}.{{ $.Values.global.zone }}
+ url: http://loki-gateway.{{ .Release.Namespace }}.{{ $.Values.global.zone }}
access: proxy
isDefault: false
editable: false
+ # add 5s on global timeout to distinguish between Grafana timeout & datasource timeout
+ timeout: '{{ add $.Values.global.dashboards.queryTimeout 5 }}'
+ uid: loki
+ - name: postgres
+ type: postgres
+ url: '{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}'
+ user: '{{ .Values.global.postgres.username }}'
+ secureJsonData:
+ password: '{{ if .Values.global.postgres.password }}{{ .Values.global.postgres.password }}{{ else }}$PGPASSWORD{{ end }}'
+ jsonData:
+ sslmode: '{{ .Values.global.postgres.sslmode }}'
+ isDefault: false
+ editable: false
+ # add 5s on global timeout to distinguish between Grafana timeout & datasource timeout
+ timeout: '{{ add $.Values.global.dashboards.queryTimeout 5 }}'
+ uid: postgres
admin:
- existingSecret: grafana-admin
- userKey: username
- passwordKey: password
+ existingSecret: ""
+ env:
+ GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION: true
grafana.ini:
+ auth.anonymous:
+ enabled: true
+ org_name: Main Org.
+ org_role: Admin
analytics:
reporting_enabled: false
users:
allow_sign_up: false
+ feature_toggles:
+ # migrate Angular panels to React
+ # see https://grafana.com/docs/grafana/latest/developers/angular_deprecation/angular-plugins/#automatic-migration-of-plugins
+ autoMigrateOldPanels: true
+ dashboards:
+ # mounted configmap will be synced with sidecar
+ default_home_dashboard_path: /var/lib/grafana/dashboards/coder/0/status.json
+ dataproxy:
+ timeout: '{{ $.Values.global.dashboards.queryTimeout }}'
sidecar:
dashboards:
provider:
disableDelete: true
allowUiUpdates: true
- enabled: true
+ enabled: false
labelValue: "1"
+ extraConfigmapMounts:
+ # we can't combine configmaps because of the 1MiB size limit, but Grafana will scan
+ # the /var/lib/grafana/dashboards/coder directory deeply to find dashboards
+ - name: dashboards-status
+ mountPath: /var/lib/grafana/dashboards/coder/0
+ configMap: dashboards-status
+ readOnly: false
+ - name: dashboards-coderd
+ mountPath: /var/lib/grafana/dashboards/coder/1
+ configMap: dashboards-coderd
+ readOnly: false
+ - name: dashboards-provisionerd
+ mountPath: /var/lib/grafana/dashboards/coder/2
+ configMap: dashboards-provisionerd
+ readOnly: false
+ - name: dashboards-workspaces
+ mountPath: /var/lib/grafana/dashboards/coder/3
+ configMap: dashboards-workspaces
+ readOnly: false
+ - name: dashboards-workspace-detail
+ mountPath: /var/lib/grafana/dashboards/coder/4
+ configMap: dashboards-workspace-detail
+ readOnly: false
+ - name: dashboards-prebuilds
+ mountPath: /var/lib/grafana/dashboards/coder/5
+ configMap: dashboards-prebuilds
+ readOnly: false
-metrics:
+prometheus:
enabled: true
server:
- fullnameOverride: metrics
+ fullnameOverride: prometheus
podAnnotations:
prometheus.io/scrape: "true"
global:
- scrape_interval: 15s
+ # prometheus.server.evaluation_interval -- how often to evaluate recording & alerting rule groups
+ evaluation_interval: 30s
+
extraArgs:
log.level: debug
+
replicaCount: 1
statefulSet:
enabled: true
@@ -99,10 +466,36 @@ metrics:
extraFlags:
- web.enable-lifecycle
- enable-feature=remote-write-receiver
+ extraConfigmapMounts:
+ - name: alerts
+ mountPath: /etc/config/alerts
+ configMap: metrics-alerts
+ readonly: true
+
+ serverFiles:
+ prometheus.yml:
+ # disables scraping of metrics by the Prometheus helm chart since this is managed by the collector
+ scrape_configs: []
+ # use custom rule files to be able to render templates (can't do that in values.yaml, unless that value is evaluated by a tpl call)
+ rule_files:
+ - /etc/config/alerts/*.yaml
testFramework:
enabled: false
+ # enable metric collection from configmap reloader
+ configmapReload:
+ prometheus:
+ extraArgs:
+ log-level: all
+ watch-interval: 15s
+ containerPort: 9091
+ extraConfigmapMounts:
+ - name: alerts
+ mountPath: /etc/config/alerts
+ configMap: metrics-alerts
+ readonly: true
+
alertmanager:
fullnameOverride: alertmanager
enabled: true
@@ -125,15 +518,10 @@ metrics:
prometheus-pushgateway:
enabled: false
- # disables scraping of metrics by the Prometheus helm chart since this is managed by the collector
- serverFiles:
- prometheus.yml:
- scrape_configs:
-
-logs:
+loki:
enabled: true
- nameOverride: logs
- fullnameOverride: logs
+ nameOverride: loki
+ fullnameOverride: loki
enterprise:
enabled: false
@@ -142,20 +530,20 @@ logs:
useExternalLicense: false
test:
- canaryServiceAddress: "http://logs-canary:3500/metrics"
+ canaryServiceAddress: "http://loki-canary:3500/metrics"
enabled: true
minio:
enabled: true
- fullnameOverride: logs-storage
- address: logs-storage.{{ .Release.Namespace }}.{{ .Values.global.zone}}:9000
+ fullnameOverride: loki-storage
+ address: loki-storage.{{ .Release.Namespace }}.{{ .Values.global.zone}}:9000
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/minio/v2/metrics/cluster"
+ podLabels:
+ app.kubernetes.io/name: "loki-storage"
loki:
- podAnnotations:
- prometheus.io/scrape: "true"
auth_enabled: false
commonConfig:
path_prefix: /var/loki
@@ -176,7 +564,7 @@ logs:
clients:
# "fake" is the default username when auth is disabled (unfortunate, I know)
fake:
- url: http://metrics.{{ .Release.Namespace }}.{{ .Values.global.zone}}/api/v1/write
+ url: http://prometheus.{{ .Release.Namespace }}.{{ .Values.global.zone}}/api/v1/write
headers:
Source: Loki
remote_timeout: 30s
@@ -222,12 +610,18 @@ logs:
gateway:
replicas: 1
write:
+ podAnnotations:
+ prometheus.io/scrape: "true"
replicas: 1
extraArgs:
- -log.level=debug
read:
+ podAnnotations:
+ prometheus.io/scrape: "true"
replicas: 1
backend:
+ podAnnotations:
+ prometheus.io/scrape: "true"
replicas: 1
extraVolumes:
- name: ruler-wal
@@ -236,4 +630,4 @@ logs:
- name: ruler-wal
mountPath: /var/loki-ruler-wal
extraArgs:
- - -log.level=debug
\ No newline at end of file
+ - -log.level=debug
diff --git a/compiled/resources.yaml b/compiled/resources.yaml
new file mode 100644
index 0000000..aff5679
--- /dev/null
+++ b/compiled/resources.yaml
@@ -0,0 +1,12418 @@
+---
+# Source: coder-observability/charts/loki/templates/chunks-cache/poddisruptionbudget-chunks-cache.yaml
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ name: loki-memcached-chunks-cache
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: memcached-chunks-cache
+spec:
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: memcached-chunks-cache
+ maxUnavailable: 1
+---
+# Source: coder-observability/charts/loki/templates/results-cache/poddisruptionbudget-results-cache.yaml
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ name: loki-memcached-results-cache
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: memcached-results-cache
+spec:
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: memcached-results-cache
+ maxUnavailable: 1
+---
+# Source: coder-observability/charts/grafana-agent/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: grafana-agent
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+---
+# Source: coder-observability/charts/grafana/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+automountServiceAccountToken: false
+metadata:
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ name: grafana
+ namespace: coder-observability
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: "minio-sa"
+ namespace: "coder-observability"
+---
+# Source: coder-observability/charts/loki/templates/loki-canary/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: loki-canary
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: canary
+ annotations:
+ prometheus.io/scrape: "true"
+automountServiceAccountToken: true
+---
+# Source: coder-observability/charts/loki/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: loki
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+automountServiceAccountToken: true
+---
+# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: alertmanager
+ labels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ namespace: coder-observability
+automountServiceAccountToken: true
+---
+# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+automountServiceAccountToken: true
+metadata:
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: kube-state-metrics
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+ name: kube-state-metrics
+ namespace: coder-observability
+---
+# Source: coder-observability/charts/prometheus/charts/prometheus-node-exporter/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: node-exporter
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: prometheus-node-exporter
+ app.kubernetes.io/name: prometheus-node-exporter
+ app.kubernetes.io/instance: coder-observability
+automountServiceAccountToken: false
+---
+# Source: coder-observability/charts/prometheus/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus
+ namespace: coder-observability
+ annotations: {}
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/secrets.yaml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: loki-storage
+ namespace: "coder-observability"
+ labels:
+ app: minio
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+type: Opaque
+data:
+ rootUser: "ZW50ZXJwcmlzZS1sb2dz"
+ rootPassword: "c3VwZXJzZWNyZXQ="
+---
+# Source: coder-observability/charts/grafana/templates/configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ annotations:
+ prometheus.io/scrape: "true"
+data:
+ grafana.ini: |
+ [analytics]
+ check_for_updates = true
+ reporting_enabled = false
+ [auth.anonymous]
+ enabled = true
+ org_name = Main Org.
+ org_role = Admin
+ [dashboards]
+ default_home_dashboard_path = /var/lib/grafana/dashboards/coder/0/status.json
+ [dataproxy]
+ timeout = 900
+ [feature_toggles]
+ autoMigrateOldPanels = true
+ [grafana_net]
+ url = https://grafana.net
+ [log]
+ mode = console
+ [paths]
+ data = /var/lib/grafana/
+ logs = /var/log/grafana
+ plugins = /var/lib/grafana/plugins
+ provisioning = /etc/grafana/provisioning
+ [server]
+ domain = ''
+ [users]
+ allow_sign_up = false
+ datasources.yaml: |
+ apiVersion: 1
+ datasources:
+ - access: proxy
+ editable: false
+ isDefault: true
+ name: metrics
+ timeout: '905'
+ type: prometheus
+ uid: prometheus
+ url: http://prometheus.coder-observability.svc
+ - access: proxy
+ editable: false
+ isDefault: false
+ name: logs
+ timeout: '905'
+ type: loki
+ uid: loki
+ url: http://loki-gateway.coder-observability.svc
+ - editable: false
+ isDefault: false
+ jsonData:
+ sslmode: 'disable'
+ name: postgres
+ secureJsonData:
+ password: '$PGPASSWORD'
+ timeout: '905'
+ type: postgres
+ uid: postgres
+ url: 'localhost:5432'
+ user: 'coder'
+ coder.yaml: |
+ apiVersion: 1
+ providers:
+ - disableDeletion: false
+ editable: false
+ folder: Coder
+ name: coder
+ options:
+ path: /var/lib/grafana/dashboards/coder
+ orgId: 1
+ type: file
+ updateIntervalSeconds: 5
+ infra.yaml: |
+ apiVersion: 1
+ providers:
+ - disableDeletion: false
+ editable: false
+ folder: Infrastructure
+ name: infra
+ options:
+ path: /var/lib/grafana/dashboards/infra
+ orgId: 1
+ type: file
+ sidecar.yaml: |
+ apiVersion: 1
+ providers:
+ - disableDeletion: false
+ editable: false
+ folder: Other
+ name: sidecar
+ options:
+ path: /tmp/dashboards
+ orgId: 1
+ type: file
+ updateIntervalSeconds: 30
+ download_dashboards.sh: "#!/usr/bin/env sh\nset -euf\nmkdir -p /var/lib/grafana/dashboards/coder\nmkdir -p /var/lib/grafana/dashboards/infra\nmkdir -p /tmp/dashboards\n\ncurl -skf \\\n--connect-timeout 60 \\\n--max-time 60 \\\n-H \"Accept: application/json\" \\\n-H \"Content-Type: application/json;charset=UTF-8\" \\\n \"https://grafana.com/api/dashboards/1860/revisions/36/download\" \\\n | sed '/-- .* --/! s/\"datasource\":.*,/\"datasource\": \"metrics\",/g' \\\n> \"/var/lib/grafana/dashboards/infra/node-exporter-full.json\"\n \ncurl -skf \\\n--connect-timeout 60 \\\n--max-time 60 \\\n-H \"Accept: application/json\" \\\n-H \"Content-Type: application/json;charset=UTF-8\" \\\n \"https://grafana.com/api/dashboards/9628/revisions/7/download\" \\\n | sed '/-- .* --/! s/\"datasource\":.*,/\"datasource\": \"metrics\",/g' \\\n> \"/var/lib/grafana/dashboards/infra/postgres-database.json\"\n"
+---
+# Source: coder-observability/charts/grafana/templates/dashboards-json-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-dashboards-infra
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ dashboard-provider: infra
+data: {}
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-storage
+ namespace: "coder-observability"
+ labels:
+ app: minio
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+data:
+ initialize: "#!/bin/sh\nset -e ; # Have script exit in the event of a failed command.\nMC_CONFIG_DIR=\"/etc/minio/mc/\"\nMC=\"/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}\"\n\n# connectToMinio\n# Use a check-sleep-check loop to wait for MinIO service to be available\nconnectToMinio() {\n SCHEME=$1\n ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts\n set -e ; # fail if we can't read the keys.\n ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ;\n set +e ; # The connections to minio are allowed to fail.\n echo \"Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT\" ;\n MC_COMMAND=\"${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET\" ;\n $MC_COMMAND ;\n STATUS=$? ;\n until [ $STATUS = 0 ]\n do\n ATTEMPTS=`expr $ATTEMPTS + 1` ;\n echo \\\"Failed attempts: $ATTEMPTS\\\" ;\n if [ $ATTEMPTS -gt $LIMIT ]; then\n exit 1 ;\n fi ;\n sleep 2 ; # 1 second intervals between attempts\n $MC_COMMAND ;\n STATUS=$? ;\n done ;\n set -e ; # reset `e` as active\n return 0\n}\n\n# checkBucketExists ($bucket)\n# Check if the bucket exists, by using the exit code of `mc ls`\ncheckBucketExists() {\n BUCKET=$1\n CMD=$(${MC} ls myminio/$BUCKET > /dev/null 2>&1)\n return $?\n}\n\n# createBucket ($bucket, $policy, $purge)\n# Ensure bucket exists, purging if asked to\ncreateBucket() {\n BUCKET=$1\n POLICY=$2\n PURGE=$3\n VERSIONING=$4\n OBJECTLOCKING=$5\n\n # Purge the bucket, if set & exists\n # Since PURGE is user input, check explicitly for `true`\n if [ $PURGE = true ]; then\n if checkBucketExists $BUCKET ; then\n echo \"Purging bucket '$BUCKET'.\"\n set +e ; # don't exit if this fails\n ${MC} rm -r --force myminio/$BUCKET\n set -e ; # reset `e` as active\n else\n echo \"Bucket '$BUCKET' does not exist, skipping purge.\"\n fi\n fi\n\n# Create the bucket if it does not exist and set objectlocking if enabled (NOTE: versioning will be not changed if OBJECTLOCKING is set because it enables versioning to the Buckets created)\nif ! checkBucketExists $BUCKET ; then\n if [ ! -z $OBJECTLOCKING ] ; then\n if [ $OBJECTLOCKING = true ] ; then\n echo \"Creating bucket with OBJECTLOCKING '$BUCKET'\"\n ${MC} mb --with-lock myminio/$BUCKET\n elif [ $OBJECTLOCKING = false ] ; then\n echo \"Creating bucket '$BUCKET'\"\n ${MC} mb myminio/$BUCKET\n fi\n elif [ -z $OBJECTLOCKING ] ; then\n echo \"Creating bucket '$BUCKET'\"\n ${MC} mb myminio/$BUCKET\n else\n echo \"Bucket '$BUCKET' already exists.\" \n fi\n fi\n\n\n # set versioning for bucket if objectlocking is disabled or not set\n if [ -z $OBJECTLOCKING ] ; then\n if [ ! -z $VERSIONING ] ; then\n if [ $VERSIONING = true ] ; then\n echo \"Enabling versioning for '$BUCKET'\"\n ${MC} version enable myminio/$BUCKET\n elif [ $VERSIONING = false ] ; then\n echo \"Suspending versioning for '$BUCKET'\"\n ${MC} version suspend myminio/$BUCKET\n fi\n fi\n else\n echo \"Bucket '$BUCKET' versioning unchanged.\"\n fi\n\n\n # At this point, the bucket should exist, skip checking for existence\n # Set policy on the bucket\n echo \"Setting policy of bucket '$BUCKET' to '$POLICY'.\"\n ${MC} policy set $POLICY myminio/$BUCKET\n}\n\n# Try connecting to MinIO instance\nscheme=http\nconnectToMinio $scheme\n\n\n\n# Create the buckets\ncreateBucket chunks none false \ncreateBucket ruler none false \ncreateBucket admin none false "
+ add-user: |-
+ #!/bin/sh
+ set -e ; # Have script exit in the event of a failed command.
+ MC_CONFIG_DIR="/etc/minio/mc/"
+ MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}"
+
+ # AccessKey and secretkey credentials file are added to prevent shell execution errors caused by special characters.
+ # Special characters for example : ',",<,>,{,}
+ MINIO_ACCESSKEY_SECRETKEY_TMP="/tmp/accessKey_and_secretKey_tmp"
+
+ # connectToMinio
+ # Use a check-sleep-check loop to wait for MinIO service to be available
+ connectToMinio() {
+ SCHEME=$1
+ ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts
+ set -e ; # fail if we can't read the keys.
+ ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ;
+ set +e ; # The connections to minio are allowed to fail.
+ echo "Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ;
+ MC_COMMAND="${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ;
+ $MC_COMMAND ;
+ STATUS=$? ;
+ until [ $STATUS = 0 ]
+ do
+ ATTEMPTS=`expr $ATTEMPTS + 1` ;
+ echo \"Failed attempts: $ATTEMPTS\" ;
+ if [ $ATTEMPTS -gt $LIMIT ]; then
+ exit 1 ;
+ fi ;
+ sleep 2 ; # 1 second intervals between attempts
+ $MC_COMMAND ;
+ STATUS=$? ;
+ done ;
+ set -e ; # reset `e` as active
+ return 0
+ }
+
+ # checkUserExists ()
+ # Check if the user exists, by using the exit code of `mc admin user info`
+ checkUserExists() {
+ CMD=$(${MC} admin user info myminio $(head -1 $MINIO_ACCESSKEY_SECRETKEY_TMP) > /dev/null 2>&1)
+ return $?
+ }
+
+ # createUser ($policy)
+ createUser() {
+ POLICY=$1
+ #check accessKey_and_secretKey_tmp file
+ if [[ ! -f $MINIO_ACCESSKEY_SECRETKEY_TMP ]];then
+ echo "credentials file does not exist"
+ return 1
+ fi
+ if [[ $(cat $MINIO_ACCESSKEY_SECRETKEY_TMP|wc -l) -ne 2 ]];then
+ echo "credentials file is invalid"
+ rm -f $MINIO_ACCESSKEY_SECRETKEY_TMP
+ return 1
+ fi
+ USER=$(head -1 $MINIO_ACCESSKEY_SECRETKEY_TMP)
+ # Create the user if it does not exist
+ if ! checkUserExists ; then
+ echo "Creating user '$USER'"
+ cat $MINIO_ACCESSKEY_SECRETKEY_TMP | ${MC} admin user add myminio
+ else
+ echo "User '$USER' already exists."
+ fi
+ #clean up credentials files.
+ rm -f $MINIO_ACCESSKEY_SECRETKEY_TMP
+
+ # set policy for user
+ if [ ! -z $POLICY -a $POLICY != " " ] ; then
+ echo "Adding policy '$POLICY' for '$USER'"
+ ${MC} admin policy set myminio $POLICY user=$USER
+ else
+ echo "User '$USER' has no policy attached."
+ fi
+ }
+
+ # Try connecting to MinIO instance
+ scheme=http
+ connectToMinio $scheme
+
+
+
+ # Create the users
+ echo console > $MINIO_ACCESSKEY_SECRETKEY_TMP
+ echo console123 >> $MINIO_ACCESSKEY_SECRETKEY_TMP
+ createUser consoleAdmin
+ add-policy: |-
+ #!/bin/sh
+ set -e ; # Have script exit in the event of a failed command.
+ MC_CONFIG_DIR="/etc/minio/mc/"
+ MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}"
+
+ # connectToMinio
+ # Use a check-sleep-check loop to wait for MinIO service to be available
+ connectToMinio() {
+ SCHEME=$1
+ ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts
+ set -e ; # fail if we can't read the keys.
+ ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ;
+ set +e ; # The connections to minio are allowed to fail.
+ echo "Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ;
+ MC_COMMAND="${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ;
+ $MC_COMMAND ;
+ STATUS=$? ;
+ until [ $STATUS = 0 ]
+ do
+ ATTEMPTS=`expr $ATTEMPTS + 1` ;
+ echo \"Failed attempts: $ATTEMPTS\" ;
+ if [ $ATTEMPTS -gt $LIMIT ]; then
+ exit 1 ;
+ fi ;
+ sleep 2 ; # 1 second intervals between attempts
+ $MC_COMMAND ;
+ STATUS=$? ;
+ done ;
+ set -e ; # reset `e` as active
+ return 0
+ }
+
+ # checkPolicyExists ($policy)
+ # Check if the policy exists, by using the exit code of `mc admin policy info`
+ checkPolicyExists() {
+ POLICY=$1
+ CMD=$(${MC} admin policy info myminio $POLICY > /dev/null 2>&1)
+ return $?
+ }
+
+ # createPolicy($name, $filename)
+ createPolicy () {
+ NAME=$1
+ FILENAME=$2
+
+ # Create the name if it does not exist
+ echo "Checking policy: $NAME (in /config/$FILENAME.json)"
+ if ! checkPolicyExists $NAME ; then
+ echo "Creating policy '$NAME'"
+ else
+ echo "Policy '$NAME' already exists."
+ fi
+ ${MC} admin policy add myminio $NAME /config/$FILENAME.json
+
+ }
+
+ # Try connecting to MinIO instance
+ scheme=http
+ connectToMinio $scheme
+ custom-command: |-
+ #!/bin/sh
+ set -e ; # Have script exit in the event of a failed command.
+ MC_CONFIG_DIR="/etc/minio/mc/"
+ MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}"
+
+ # connectToMinio
+ # Use a check-sleep-check loop to wait for MinIO service to be available
+ connectToMinio() {
+ SCHEME=$1
+ ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts
+ set -e ; # fail if we can't read the keys.
+ ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ;
+ set +e ; # The connections to minio are allowed to fail.
+ echo "Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ;
+ MC_COMMAND="${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ;
+ $MC_COMMAND ;
+ STATUS=$? ;
+ until [ $STATUS = 0 ]
+ do
+ ATTEMPTS=`expr $ATTEMPTS + 1` ;
+ echo \"Failed attempts: $ATTEMPTS\" ;
+ if [ $ATTEMPTS -gt $LIMIT ]; then
+ exit 1 ;
+ fi ;
+ sleep 2 ; # 1 second intervals between attempts
+ $MC_COMMAND ;
+ STATUS=$? ;
+ done ;
+ set -e ; # reset `e` as active
+ return 0
+ }
+
+ # runCommand ($@)
+ # Run custom mc command
+ runCommand() {
+ ${MC} "$@"
+ return $?
+ }
+
+ # Try connecting to MinIO instance
+ scheme=http
+ connectToMinio $scheme
+---
+# Source: coder-observability/charts/loki/templates/config.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+data:
+ config.yaml: |2
+ auth_enabled: false
+ chunk_store_config:
+ chunk_cache_config:
+ background:
+ writeback_buffer: 500000
+ writeback_goroutines: 1
+ writeback_size_limit: 500MB
+ default_validity: 0s
+ memcached:
+ batch_size: 4
+ parallelism: 5
+ memcached_client:
+ addresses: dnssrvnoa+_memcached-client._tcp.loki-chunks-cache.coder-observability.svc
+ consistent_hash: true
+ max_idle_conns: 72
+ timeout: 2000ms
+ common:
+ compactor_address: 'http://loki-backend:3100'
+ path_prefix: /var/loki
+ replication_factor: 1
+ storage:
+ s3:
+ access_key_id: enterprise-logs
+ bucketnames: chunks
+ endpoint: loki-storage.coder-observability.svc:9000
+ insecure: true
+ s3forcepathstyle: true
+ secret_access_key: supersecret
+ frontend:
+ scheduler_address: ""
+ tail_proxy_url: ""
+ frontend_worker:
+ scheduler_address: ""
+ index_gateway:
+ mode: simple
+ limits_config:
+ max_cache_freshness_per_query: 10m
+ query_timeout: 300s
+ reject_old_samples: true
+ reject_old_samples_max_age: 168h
+ split_queries_by_interval: 15m
+ volume_enabled: true
+ memberlist:
+ join_members:
+ - loki-memberlist
+ pattern_ingester:
+ enabled: false
+ query_range:
+ align_queries_with_step: true
+ cache_results: true
+ results_cache:
+ cache:
+ background:
+ writeback_buffer: 500000
+ writeback_goroutines: 1
+ writeback_size_limit: 500MB
+ default_validity: 12h
+ memcached_client:
+ addresses: dnssrvnoa+_memcached-client._tcp.loki-results-cache.coder-observability.svc
+ consistent_hash: true
+ timeout: 500ms
+ update_interval: 1m
+ ruler:
+ alertmanager_url: http://alertmanager.coder-observability.svc
+ enable_alertmanager_v2: true
+ enable_api: true
+ remote_write:
+ clients:
+ fake:
+ headers:
+ Source: Loki
+ remote_timeout: 30s
+ url: http://prometheus.coder-observability.svc/api/v1/write
+ enabled: true
+ ring:
+ kvstore:
+ store: inmemory
+ rule_path: /rules
+ storage:
+ local:
+ directory: /rules
+ type: local
+ wal:
+ dir: /var/loki-ruler-wal
+ runtime_config:
+ file: /etc/loki/runtime-config/runtime-config.yaml
+ schema_config:
+ configs:
+ - from: "2024-04-01"
+ index:
+ period: 24h
+ prefix: index_
+ object_store: s3
+ schema: v13
+ store: tsdb
+ server:
+ grpc_listen_port: 9095
+ http_listen_port: 3100
+ http_server_read_timeout: 600s
+ http_server_write_timeout: 600s
+ storage_config:
+ boltdb_shipper:
+ index_gateway_client:
+ server_address: dns+loki-backend-headless.coder-observability.svc.cluster.local:9095
+ hedging:
+ at: 250ms
+ max_per_second: 20
+ up_to: 3
+ tsdb_shipper:
+ index_gateway_client:
+ server_address: dns+loki-backend-headless.coder-observability.svc.cluster.local:9095
+ tracing:
+ enabled: false
+---
+# Source: coder-observability/charts/loki/templates/gateway/configmap-gateway.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-gateway
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: gateway
+data:
+ nginx.conf: "worker_processes 5; ## Default: 1\nerror_log /dev/stderr;\npid /tmp/nginx.pid;\nworker_rlimit_nofile 8192;\n\nevents {\n worker_connections 4096; ## Default: 1024\n}\n\nhttp {\n client_body_temp_path /tmp/client_temp;\n proxy_temp_path /tmp/proxy_temp_path;\n fastcgi_temp_path /tmp/fastcgi_temp;\n uwsgi_temp_path /tmp/uwsgi_temp;\n scgi_temp_path /tmp/scgi_temp;\n\n client_max_body_size 4M;\n\n proxy_read_timeout 600; ## 10 minutes\n proxy_send_timeout 600;\n proxy_connect_timeout 600;\n\n proxy_http_version 1.1;\n\n default_type application/octet-stream;\n log_format main '$remote_addr - $remote_user [$time_local] $status '\n '\"$request\" $body_bytes_sent \"$http_referer\" '\n '\"$http_user_agent\" \"$http_x_forwarded_for\"';\n access_log /dev/stderr main;\n\n sendfile on;\n tcp_nopush on;\n resolver kube-dns.kube-system.svc.cluster.local.;\n \n\n server {\n listen 8080;\n listen [::]:8080;\n\n location = / {\n return 200 'OK';\n auth_basic off;\n }\n\n ########################################################\n # Configure backend targets# Distributor\n location = /api/prom/push {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/push {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /distributor/ring {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /otlp/v1/logs {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Ingester\n location = /flush {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location ^~ /ingester/ {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /ingester {\n internal; # to suppress 301\n }\n\n # Ring\n location = /ring {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # MemberListKV\n location = /memberlist {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Ruler\n location = /ruler/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /api/prom/rules {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location ^~ /api/prom/rules/ {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/rules {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location ^~ /loki/api/v1/rules/ {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /prometheus/api/v1/alerts {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /prometheus/api/v1/rules {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Compactor\n location = /compactor/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/delete {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/cache/generation_numbers {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # IndexGateway\n location = /indexgateway/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # QueryScheduler\n location = /scheduler/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Config\n location = /config {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n\n # QueryFrontend, Querier\n location = /api/prom/tail {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n }\n location = /loki/api/v1/tail {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n }\n location ^~ /api/prom/ {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /api/prom {\n internal; # to suppress 301\n }\n location ^~ /loki/api/v1/ {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1 {\n internal; # to suppress 301\n }\n }\n}\n"
+---
+# Source: coder-observability/charts/loki/templates/monitoring/dashboards/configmap-1.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-dashboards-1
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ grafana_dashboard: "1"
+data:
+ "loki-chunks.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"series","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"chunks","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunks per series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Active Series / Chunks","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Age","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Log Entries Per Chunk","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Index Entries","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Index Entries Per Chunk","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Queue Length","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Flush Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunks Flushed/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{reason}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Flush Reason","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":1,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":11,"legend":{"show":true},"span":12,"targets":[{"expr":"sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"heatmap","intervalFactor":2,"legendFormat":"{{le}}","refId":"A"}],"title":"Chunk Utilization","tooltip":{"show":true,"showHistogram":true},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"percentunit","show":true,"splitFactor":null},"yBucketBound":"auto"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":12,"legend":{"show":true},"span":12,"targets":[{"expr":"sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)","format":"heatmap","intervalFactor":2,"legendFormat":"{{le}}","refId":"A"}],"title":"Chunk Size Bytes","tooltip":{"show":true,"showHistogram":true},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"bytes","show":true,"splitFactor":null},"yBucketBound":"auto"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":13,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p99","legendLink":null,"step":10},{"expr":"histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p90","legendLink":null,"step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p50","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Size Quantiles","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p50","legendLink":null,"step":10},{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p99","legendLink":null,"step":10},{"expr":"sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"avg","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Duration hours (end-start)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Duration","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Chunks","uid":"chunks","version":0}
+ "loki-deletion.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"none","id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Number of Pending Requests","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"dtdurations","id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Oldest Pending Request Age","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))","format":"time_series","intervalFactor":2,"legendFormat":"received","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Requests Received / Day","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))","format":"time_series","intervalFactor":2,"legendFormat":"processed","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Requests Processed / Day","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Churn","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))","format":"time_series","intervalFactor":2,"legendFormat":"failures","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Failures in Loading Delete Requests / Hour","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Failures","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)","format":"time_series","intervalFactor":2,"legendFormat":"{{user}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Lines Deleted / Sec","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Deleted lines","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Deletion","uid":"deletion","version":0}
+ "loki-logs.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":8,"iteration":1583185057230,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":0,"y":0},"hiddenSeries":false,"id":35,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"goroutines","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":3,"y":0},"hiddenSeries":false,"id":41,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(go_gc_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)","legendFormat":"{{quantile}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"gc duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":6,"y":0},"hiddenSeries":false,"id":36,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"cpu","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":9,"y":0},"hiddenSeries":false,"id":40,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"working set","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":12,"y":0},"hiddenSeries":false,"id":38,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"tx","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":15,"y":0},"hiddenSeries":false,"id":39,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"rx","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":18,"y":0},"hiddenSeries":false,"id":37,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"increase(kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) \u003e 0","legendFormat":"{{reason}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"restarts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":21,"y":0},"hiddenSeries":false,"id":42,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)","legendFormat":"{{level}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"bad words","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":4},"hiddenSeries":false,"id":31,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"warn","color":"#FF780A"},{"alias":"error","color":"#E02F44"},{"alias":"info","color":"#56A64B"},{"alias":"debug","color":"#3274D9"}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=~\"$level\" |= \"$filter\" [5m])) by (level)","intervalFactor":3,"legendFormat":"{{level}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":19,"w":24,"x":0,"y":6},"id":29,"maxDataPoints":"","options":{"showLabels":false,"showTime":true,"sortOrder":"Descending","wrapLogMessage":true},"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=~\"$level\" |= \"$filter\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"deployment","options":[],"query":"label_values(kube_deployment_created{cluster=\"$cluster\", namespace=\"$namespace\"}, deployment)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"container","options":[],"query":"label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"selected":true,"text":"","value":""},"hide":0,"includeAll":false,"label":"","multi":true,"name":"level","options":[{"selected":false,"text":"debug","value":"debug"},{"selected":false,"text":"info","value":"info"},{"selected":false,"text":"warn","value":"warn"},{"selected":false,"text":"error","value":"error"}],"query":"debug,info,warn,error","refresh":0,"type":"custom"},{"current":{"selected":false,"text":"","value":""},"label":"LogQL Filter","name":"filter","query":"","type":"textbox"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Logs","uid":"logs","version":0}
+ "loki-mixin-recording-rules.json": |
+ {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations \u0026 Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"},{"datasource":"${datasource}","enable":false,"expr":"sum by (tenant) (changes(loki_ruler_wal_prometheus_tsdb_wal_truncations_total{tenant=~\"${tenant}\"}[$__rate_interval]))","iconColor":"red","name":"WAL Truncations","target":{"queryType":"Azure Monitor","refId":"Anno"},"titleFormat":"{{tenant}}"}]},"editable":true,"fiscalYearStartMonth":0,"gnetId":null,"graphTooltip":0,"iteration":1635347545534,"links":[],"liveNow":false,"panels":[{"datasource":"${datasource}","fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":1}]}},"overrides":[]},"gridPos":{"h":10,"w":2,"x":0,"y":0},"id":2,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.0-38205pre","targets":[{"datasource":"${datasource}","exemplar":false,"expr":"sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0","instant":true,"interval":"","legendFormat":"","refId":"A"}],"title":"Appenders Not Ready","type":"stat"},{"datasource":"${datasource}","description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":11,"x":2,"y":0},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Samples Appended to WAL per Second","type":"timeseries"},{"datasource":"${datasource}","description":"Series are unique combinations of labels","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":11,"x":13,"y":0},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Series Created per Second","type":"timeseries"},{"datasource":"${datasource}","description":"Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":10},"id":6,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Write Behind","type":"timeseries"},{"datasource":"${datasource}","description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":10},"id":7,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Samples Sent per Second","type":"timeseries"},{"datasource":"${datasource}","description":"\n","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":20},"id":8,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"WAL Disk Size","type":"timeseries"},{"datasource":"${datasource}","description":"Some number of pending samples is expected, but if remote-write is failing this value will remain high","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":20},"id":9,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Pending Samples","type":"timeseries"}],"schemaVersion":31,"style":"dark","tags":[],"templating":{"list":[{"description":null,"error":null,"hide":0,"includeAll":false,"label":"Datasource","multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":null,"datasource":"${datasource}","definition":"label_values(loki_ruler_wal_samples_appended_total, tenant)","description":null,"error":null,"hide":0,"includeAll":true,"label":"Tenant","multi":true,"name":"tenant","options":[],"query":{"query":"label_values(loki_ruler_wal_samples_appended_total, tenant)","refId":"StandardVariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":0,"type":"query"}]},"time":{"from":"now-6h","to":"now"},"timepicker":{},"timezone":"","title":"Recording Rules","uid":"2xKA_ZK7k","version":9,"weekStart":""}
+ "loki-operational.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":68,"iteration":1588704280892,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"panels":[{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":17,"panels":[],"targets":[],"title":"Main","type":"row"},{"aliasColors":{"5xx":"red"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":0,"y":1},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)","legendFormat":"{{status}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queries/Second","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":10,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{"5xx":"red"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":4,"y":1},"hiddenSeries":false,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))","legendFormat":"{{status}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Pushes/Second","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":10,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":12,"y":1},"hiddenSeries":false,"id":2,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))","legendFormat":"{{tenant}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Lines Per Tenant (top 10)","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":16,"y":1},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024","legendFormat":"{{tenant}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MBs Per Tenant (Top 10)","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":20,"y":1},"hiddenSeries":false,"id":24,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) \u003e 0","hide":false,"interval":"","legendFormat":"{{container}}-{{pod}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Container Restarts","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":6},"hiddenSeries":false,"id":9,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.75, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Push Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":6},"hiddenSeries":false,"id":12,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Distributor Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":6},"hiddenSeries":false,"id":71,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Distributor Success Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":11},"hiddenSeries":false,"id":13,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","hide":false,"legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","hide":false,"legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Latency Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":11},"hiddenSeries":false,"id":72,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Success Rate Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":16},"hiddenSeries":false,"id":10,"legend":{"alignAsTable":true,"avg":false,"current":false,"hideEmpty":true,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Query Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":16},"hiddenSeries":false,"id":14,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".99-{{route}}","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".9-{{route}}","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".5-{{route}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Querier Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":16},"hiddenSeries":false,"id":73,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Querier Success Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":21},"hiddenSeries":false,"id":15,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".99-{{route}}","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".9-{{route}}","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".5-{{route}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Latency Read","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":21},"hiddenSeries":false,"id":74,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Success Rate Read","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":110,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":27},"hiddenSeries":false,"id":112,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))","interval":"","legendFormat":"{{ tenant }} - {{ reason }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Discarded Lines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"columns":[],"datasource":"$datasource","fontSize":"100%","gridPos":{"h":8,"w":12,"x":12,"y":27},"id":113,"pageSize":null,"panels":[],"showHeader":true,"sort":{"col":3,"desc":true},"styles":[{"alias":"Time","align":"auto","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"","align":"auto","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"tenant","thresholds":[],"type":"string","unit":"short"},{"alias":"","align":"auto","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"reason","thresholds":[],"type":"number","unit":"short"},{"alias":"","align":"right","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":2,"pattern":"/.*/","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))","format":"table","instant":true,"interval":"","legendFormat":"{{ tenant }} - {{ reason }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Discarded Lines Per Interval","transform":"table","type":"table-old"}],"targets":[],"title":"Limits","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":27},"id":23,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":28},"hiddenSeries":false,"id":26,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}","intervalFactor":3,"legendFormat":"{{pod}}-{{container}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":28},"hiddenSeries":false,"id":27,"legend":{"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}","instant":false,"intervalFactor":3,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":12,"x":12,"y":28},"hiddenSeries":false,"id":31,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"{}","color":"#C4162A"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"[1m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Error Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":18,"w":12,"x":12,"y":32},"id":29,"options":{"showLabels":false,"showTime":false,"sortOrder":"Descending","wrapLogMessage":true},"panels":[],"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":35},"hiddenSeries":false,"id":33,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)","interval":"","intervalFactor":1,"legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Success Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":35},"hiddenSeries":false,"id":32,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (ingester)","intervalFactor":1,"legendFormat":"{{ingester}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Append Failures By Ingester","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":42},"hiddenSeries":false,"id":34,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Bytes Received/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":42},"hiddenSeries":false,"id":35,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Lines Received/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Write Path","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":104,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":30},"hiddenSeries":false,"id":106,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}))","interval":"","legendFormat":"{{ tenant }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Active Streams","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":30},"hiddenSeries":false,"id":108,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]) \u003e 0))","interval":"","legendFormat":"{{ tenant }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Streams Created/Sec","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Streams","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":94,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":31},"hiddenSeries":false,"id":102,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"De-Dupe Ratio","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))","interval":"","legendFormat":"Chunks","refId":"A"},{"expr":"sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) \u003c 1","interval":"","legendFormat":"De-Dupe Ratio","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Chunks Flushed/Sec","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","gridPos":{"h":8,"w":12,"x":12,"y":31},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":100,"legend":{"show":true},"panels":[],"reverseYBuckets":false,"targets":[{"expr":"sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) by (le)","format":"heatmap","instant":false,"interval":"","legendFormat":"{{ le }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Chunk Size Bytes","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"bytes","logBase":1,"max":null,"min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":7,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":39},"hiddenSeries":false,"id":96,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))","interval":"","legendFormat":"{{ reason }}"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Chunk Flush Reason %","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":"1","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"max":null,"min":null,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","gridPos":{"h":9,"w":12,"x":12,"y":39},"heatmap":{},"hideZeroBuckets":true,"highlightCards":true,"id":98,"legend":{"show":true},"panels":[],"reverseYBuckets":false,"targets":[{"expr":"sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))","format":"heatmap","instant":false,"interval":"","legendFormat":"{{ le }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Chunk Utilization","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"percentunit","logBase":1,"max":null,"min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null}],"targets":[],"title":"Chunks","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":31},"id":64,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":32},"hiddenSeries":false,"id":68,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}","intervalFactor":3,"legendFormat":"{{pod}}-{{container}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":39},"hiddenSeries":false,"id":69,"legend":{"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}","instant":false,"intervalFactor":3,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":3,"w":18,"x":12,"y":32},"hiddenSeries":false,"id":65,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"{}","color":"#F2495C"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"[1m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Error Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":18,"w":18,"x":12,"y":35},"id":66,"options":{"showLabels":false,"showTime":false,"sortOrder":"Descending","wrapLogMessage":true},"panels":[],"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":46},"hiddenSeries":false,"id":70,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[1m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[1m])) by (route)","interval":"","intervalFactor":1,"legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Success Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Read Path","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":52,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":30},"hiddenSeries":false,"id":53,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","intervalFactor":1,"legendFormat":"{{container}}: .99-{{method}}-{{name}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","hide":false,"legendFormat":"{{container}}: .9-{{method}}-{{name}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","hide":false,"legendFormat":"{{container}}: .5-{{method}}-{{name}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":38},"hiddenSeries":false,"id":54,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)","intervalFactor":1,"legendFormat":"{{container}}: {{status_code}}-{{method}}-{{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Memcached","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":33},"id":57,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":31},"hiddenSeries":false,"id":55,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":39},"hiddenSeries":false,"id":58,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Consul","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":43,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":9},"hiddenSeries":false,"id":41,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".9","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MutateRows Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":9},"hiddenSeries":false,"id":46,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","intervalFactor":1,"legendFormat":"99%","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","legendFormat":"90%","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","legendFormat":"50%","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ReadRows Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":12,"y":9},"hiddenSeries":false,"id":44,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","intervalFactor":1,"legendFormat":"99%","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","legendFormat":"90%","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","legendFormat":"50%","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"GetTable Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":18,"y":9},"hiddenSeries":false,"id":45,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".9","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ListTables Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":16},"hiddenSeries":false,"id":47,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MutateRows Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":16},"hiddenSeries":false,"id":50,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ReadRows Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":12,"y":16},"hiddenSeries":false,"id":48,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"GetTable Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":18,"y":16},"hiddenSeries":false,"id":49,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ListTables Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Big Table","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":35},"id":60,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":33},"hiddenSeries":false,"id":61,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":41},"hiddenSeries":false,"id":62,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"GCS","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":36},"id":76,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":0,"y":9},"id":82,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Failure Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":6,"y":9},"id":83,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Consumed Capacity Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":12,"y":9},"id":84,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Throttled Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":18,"y":9},"id":85,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Dropped Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":0,"y":15},"id":86,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Query Pages","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":6,"w":9,"x":6,"y":15},"id":87,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":6,"w":9,"x":15,"y":15},"id":88,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Dynamo","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":78,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":79,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":80,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"S3","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":78,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":79,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":80,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Azure Blob","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":114,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":115,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":116,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"BoltDB Shipper","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Operational","uid":"operational","version":0}
+---
+# Source: coder-observability/charts/loki/templates/monitoring/dashboards/configmap-2.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-dashboards-2
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ grafana_dashboard: "1"
+data:
+ "loki-reads-resources.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Writes","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Reads","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{persistentvolumeclaim}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_query_readiness_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}","format":"time_series","intervalFactor":2,"legendFormat":"duration","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Query Readiness Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Read path","titleSize":"h6","type":"row"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Ingester","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Reads Resources","uid":"reads-resources","version":0}
+ "loki-reads.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} 99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} 50th Percentile","refId":"B","step":10},{"expr":"1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Read Path","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"BoltDB Shipper","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Reads","uid":"reads","version":0}
+ "loki-retention.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Resource Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"color":{"fixedColor":"blue","mode":"fixed"},"custom":{},"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"dateTimeFromNow"}},"fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"text":{},"textMode":"auto"},"percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3","format":"time_series","instant":true,"refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Last Compact and Mark Operation Success","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"stat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}","format":"time_series","intervalFactor":2,"legendFormat":"duration","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Compact and Mark Operations Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{success}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Compact and Mark Operations Per Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Compact and Mark","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{action}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Processed Tables Per Action","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{table}}-{{action}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Modified Tables","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) \u003e0","format":"time_series","intervalFactor":2,"legendFormat":"{{table}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Marks Creation Rate Per Table","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Per Table Marker","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"short","id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Marked Chunks (24h)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Mark Table Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"short","id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Delete Chunks (24h)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":13,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Sweeper","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"lag","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sweeper Lag","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":15,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","intervalFactor":2,"legendFormat":"count","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Marks Files to Process","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":16,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Rate Per Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"datasource":"$logs","id":17,"span":12,"targets":[{"expr":"{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}","refId":"A"}],"title":"Compactor Logs","type":"logs"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Logs","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Retention","uid":"retention","version":0}
+ "loki-writes-resources.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"In-memory streams","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Writes","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Reads","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{persistentvolumeclaim}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Write path","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Writes Resources","uid":"writes-resources","version":0}
+ "loki-writes.json": |
+ {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Write Path","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"BoltDB Shipper","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Writes","uid":"writes","version":0}
+---
+# Source: coder-observability/charts/loki/templates/runtime-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: loki-runtime
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+data:
+ runtime-config.yaml: |
+ {}
+---
+# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: alertmanager
+ labels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ namespace: coder-observability
+data:
+ alertmanager.yml: |
+ global: {}
+ receivers:
+ - name: default-receiver
+ route:
+ group_interval: 5m
+ group_wait: 10s
+ receiver: default-receiver
+ repeat_interval: 3h
+ templates:
+ - /etc/alertmanager/*.tmpl
+---
+# Source: coder-observability/charts/prometheus/templates/cm.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus
+ namespace: coder-observability
+data:
+ allow-snippet-annotations: "false"
+ alerting_rules.yml: |
+ {}
+ alerts: |
+ {}
+ prometheus.yml: |
+ global:
+ evaluation_interval: 30s
+ scrape_interval: 1m
+ scrape_timeout: 10s
+ rule_files:
+ - /etc/config/alerts/*.yaml
+ scrape_configs: []
+ alerting:
+ alertmanagers:
+ - kubernetes_sd_configs:
+ - role: pod
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace]
+ regex: coder-observability
+ action: keep
+ - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance]
+ regex: coder-observability
+ action: keep
+ - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
+ regex: alertmanager
+ action: keep
+ - source_labels: [__meta_kubernetes_pod_container_port_number]
+ regex: "9093"
+ action: keep
+ recording_rules.yml: |
+ {}
+ rules: |
+ {}
+---
+# Source: coder-observability/templates/configmap-collector.yaml
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: collector-config
+ namespace: coder-observability
+data:
+ config.river: "\n// Discover k8s nodes\ndiscovery.kubernetes \"nodes\" {\n role = \"node\"\n}\n\n// Discover k8s pods\ndiscovery.kubernetes \"pods\" {\n role = \"pod\"\n selectors {\n role = \"pod\"\n }\n}\n\ndiscovery.relabel \"pod_logs\" {\n targets = discovery.kubernetes.pods.targets\n \n rule {\n source_labels = [\"__meta_kubernetes_namespace\"]\n target_label = \"namespace\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_name\"]\n target_label = \"pod\"\n }\n // coalesce the following labels and pick the first value; we'll use this to define the \"job\" label\n rule {\n source_labels = [\"__meta_kubernetes_pod_label_app_kubernetes_io_component\", \"app\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n target_label = \"__meta_app\"\n action = \"replace\"\n regex = \"^/*([^/]+?)(?:/.*)?$\" // split by the delimiter if it exists, we only want the first one\n replacement = \"${1}\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_namespace\", \"__meta_kubernetes_pod_label_app_kubernetes_io_name\", \"__meta_app\"]\n separator = \"/\"\n target_label = \"job\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_container_name\"]\n target_label = \"container\"\n }\n rule {\n regex = \"__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)\"\n action = \"labeldrop\"\n }\n rule {\n regex = \"pod_template_generation\"\n action = \"labeldrop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_phase\"]\n regex = \"Pending|Succeeded|Failed|Completed\"\n action = \"drop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_node_name\"]\n action = \"replace\"\n target_label = \"node\"\n }\n rule {\n action = \"labelmap\"\n regex = \"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)\"\n replacement = \"__param_$1\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_uid\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n action = \"replace\"\n replacement = \"/var/log/pods/*$1/*.log\"\n target_label = \"__path__\"\n }\n rule {\n action = \"replace\"\n source_labels = [\"__meta_kubernetes_pod_container_id\"]\n regex = \"^(\\\\w+):\\\\/\\\\/.+$\"\n replacement = \"$1\"\n target_label = \"tmp_container_runtime\"\n }\n}\n\ndiscovery.relabel \"pod_metrics\" {\n targets = discovery.kubernetes.pods.targets\n \n rule {\n source_labels = [\"__meta_kubernetes_namespace\"]\n target_label = \"namespace\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_name\"]\n target_label = \"pod\"\n }\n // coalesce the following labels and pick the first value; we'll use this to define the \"job\" label\n rule {\n source_labels = [\"__meta_kubernetes_pod_label_app_kubernetes_io_component\", \"app\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n target_label = \"__meta_app\"\n action = \"replace\"\n regex = \"^/*([^/]+?)(?:/.*)?$\" // split by the delimiter if it exists, we only want the first one\n replacement = \"${1}\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_namespace\", \"__meta_kubernetes_pod_label_app_kubernetes_io_name\", \"__meta_app\"]\n separator = \"/\"\n target_label = \"job\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_container_name\"]\n target_label = \"container\"\n }\n rule {\n regex = \"__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)\"\n action = \"labeldrop\"\n }\n rule {\n regex = \"pod_template_generation\"\n action = \"labeldrop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_phase\"]\n regex = \"Pending|Succeeded|Failed|Completed\"\n action = \"drop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_node_name\"]\n action = \"replace\"\n target_label = \"node\"\n }\n rule {\n action = \"labelmap\"\n regex = \"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)\"\n replacement = \"__param_$1\"\n }\n // drop ports that do not expose Prometheus metrics, but might otherwise be exposed by a container which *also*\n // exposes an HTTP port which exposes metrics\n rule {\n source_labels = [\"__meta_kubernetes_pod_container_port_name\"]\n regex = \"grpc|http-(memberlist|console)\"\n action = \"drop\"\n }\n // adapted from the Prometheus helm chart\n // https://github.com/prometheus-community/helm-charts/blob/862870fc3c847e32479b509e511584d5283126a3/charts/prometheus/values.yaml#L1070\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_scrape\"]\n action = \"keep\"\n regex = \"true\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_scheme\"]\n action = \"replace\"\n regex = \"(https?)\"\n target_label = \"__scheme__\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_path\"]\n action = \"replace\"\n target_label = \"__metrics_path__\"\n regex = \"(.+)\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_port\", \"__meta_kubernetes_pod_ip\"]\n action = \"replace\"\n regex = \"(\\\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})\"\n replacement = \"[$2]:$1\"\n target_label = \"__address__\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_port\", \"__meta_kubernetes_pod_ip\"]\n action = \"replace\"\n regex = \"(\\\\d+);((([0-9]+?)(\\\\.|$)){4})\"\n replacement = \"$2:$1\"\n target_label = \"__address__\"\n }\n}\n\nlocal.file_match \"pod_logs\" {\n path_targets = discovery.relabel.pod_logs.output\n}\n\nloki.source.file \"pod_logs\" {\n targets = local.file_match.pod_logs.targets\n forward_to = [loki.process.pod_logs.receiver]\n}\n\nloki.process \"pod_logs\" {\n stage.match {\n selector = \"{tmp_container_runtime=\\\"containerd\\\"}\"\n // the cri processing stage extracts the following k/v pairs: log, stream, time, flags\n stage.cri {}\n // Set the extract flags and stream values as labels\n stage.labels {\n values = {\n flags = \"\",\n stream = \"\",\n }\n }\n }\n\n // if the label tmp_container_runtime from above is docker parse using docker\n stage.match {\n selector = \"{tmp_container_runtime=\\\"docker\\\"}\"\n // the docker processing stage extracts the following k/v pairs: log, stream, time\n stage.docker {}\n\n // Set the extract stream value as a label\n stage.labels {\n values = {\n stream = \"\",\n }\n }\n }\n\n // drop the temporary container runtime label as it is no longer needed\n stage.label_drop {\n values = [\"tmp_container_runtime\"]\n }\n\n // parse Coder logs and extract level & logger for efficient filtering\n stage.match {\n selector = \"{pod=~\\\"coder.*\\\"}\" // TODO: make configurable\n\n stage.multiline {\n firstline = \"^(?P\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3})\"\n max_wait_time = \"10s\"\n }\n\n stage.regex {\n expression = \"^(?P\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3})\\\\s\\\\[(?P\\\\w+)\\\\]\\\\s\\\\s(?P[^:]+):\\\\s(?P.+)\"\n }\n\n stage.timestamp {\n source = \"ts\"\n format = \"2006-01-02 15:04:05.000\"\n action_on_failure = \"fudge\" // rather have inaccurate time than drop the log line\n }\n\n stage.labels {\n values = {\n level = \"\",\n logger = \"\",\n }\n }\n }\n\n forward_to = [loki.write.loki.receiver]\n}\n\nloki.write \"loki\" {\n endpoint {\n url = \"http://loki-gateway.coder-observability.svc/loki/api/v1/push\"\n }\n}\n\nprometheus.scrape \"pods\" {\n targets = discovery.relabel.pod_metrics.output\n forward_to = [prometheus.relabel.pods.receiver]\n\n scrape_interval = \"15s\"\n scrape_timeout = \"12s\"\n}\n\n// These are metric_relabel_configs while discovery.relabel are relabel_configs.\n// See https://github.com/grafana/agent/blob/main/internal/converter/internal/prometheusconvert/prometheusconvert.go#L95-L106\nprometheus.relabel \"pods\" {\n forward_to = [prometheus.remote_write.default.receiver]\n\n // Drop kube-state-metrics' labels which clash with ours\n rule {\n source_labels = [\"__name__\", \"container\"]\n regex = \"kube_pod.+;(.+)\"\n target_label = \"container\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"__name__\", \"pod\"]\n regex = \"kube_pod.+;(.+)\"\n target_label = \"pod\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"__name__\", \"namespace\"]\n regex = \"kube_pod.+;(.+)\"\n target_label = \"namespace\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"__name__\", \"exported_container\"]\n // don't replace an empty label\n regex = \"^kube_pod.+;(.+)$\"\n target_label = \"container\"\n replacement = \"$1\"\n }\n rule {\n source_labels = [\"__name__\", \"exported_pod\"]\n // don't replace an empty label\n regex = \"^kube_pod.+;(.+)$\"\n target_label = \"pod\"\n replacement = \"$1\"\n }\n rule {\n source_labels = [\"__name__\", \"exported_namespace\"]\n // don't replace an empty label\n regex = \"^kube_pod.+;(.+)$\"\n target_label = \"namespace\"\n replacement = \"$1\"\n }\n rule {\n regex = \"^(exported_.*|image_.*|container_id|id|uid)$\"\n action = \"labeldrop\"\n }\n}\n\ndiscovery.relabel \"cadvisor\" {\n targets = discovery.kubernetes.nodes.targets\n rule {\n replacement = \"/metrics/cadvisor\"\n target_label = \"__metrics_path__\"\n }\n}\n\nprometheus.scrape \"cadvisor\" {\n targets = discovery.relabel.cadvisor.output\n forward_to = [ prometheus.relabel.cadvisor.receiver ]\n scheme = \"https\"\n tls_config {\n insecure_skip_verify = true\n }\n bearer_token_file = \"/var/run/secrets/kubernetes.io/serviceaccount/token\"\n scrape_interval = \"15s\"\n scrape_timeout = \"12s\"\n}\n\nprometheus.relabel \"cadvisor\" {\n forward_to = [ prometheus.remote_write.default.receiver ]\n\n // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688\n rule {\n source_labels = [\"__name__\",\"container\"]\n separator = \"@\"\n regex = \"(container_cpu_.*|container_fs_.*|container_memory_.*)@\"\n action = \"drop\"\n }\n // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688\n rule {\n source_labels = [\"__name__\",\"image\"]\n separator = \"@\"\n regex = \"(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@\"\n action = \"drop\"\n }\n // Drop irrelevant series\n rule {\n source_labels = [\"container\"]\n regex = \"^POD$\"\n action = \"drop\"\n }\n // Drop unnecessary labels\n rule {\n source_labels = [\"id\"]\n target_label = \"id\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"job\"]\n target_label = \"job\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"name\"]\n target_label = \"name\"\n replacement = \"\"\n }\n}\n\nprometheus.remote_write \"default\" {\n endpoint {\n url =\"http://prometheus.coder-observability.svc/api/v1/write\"\n\n // drop instance label which unnecessarily adds new series when pods are restarted, since pod IPs are dynamically assigned\n // NOTE: \"__address__\" is mapped to \"instance\", so will contain :\n write_relabel_config {\n regex = \"instance\"\n action = \"labeldrop\"\n }\n }\n}"
+---
+# Source: coder-observability/templates/configmap-prometheus-alerts.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: metrics-alerts
+ namespace: coder-observability
+data:
+ coderd.yaml: "groups:\n- name: CPU Usage\n rules:\n \n - alert: CoderdCPUUsage\n expr: max by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) / max by(pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"}) > 0.9\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of CPU, which may impact application performance.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdcpuusage\n - alert: CoderdCPUUsage\n expr: max by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) / max by(pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"}) > 0.8\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of CPU, which may impact application performance.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdcpuusage\n- name: Memory Usage\n rules:\n \n - alert: CoderdMemoryUsage\n expr: max by (pod) (container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) / max by (pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"}) > 0.9\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of memory, which may lead to an Out-Of-Memory (OOM) error.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdmemoryusage\n - alert: CoderdMemoryUsage\n expr: max by (pod) (container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) / max by (pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"}) > 0.8\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of memory, which may lead to an Out-Of-Memory (OOM) error.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdmemoryusage\n- name: Pod Restarts\n rules:\n \n - alert: CoderdRestarts\n expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) > 3\n for: 1m\n annotations:\n summary: The Coder instance {{ $labels.pod }} has restarted multiple times in the last 10m, which may indicate a CrashLoop.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdrestarts\n - alert: CoderdRestarts\n expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) > 1\n for: 1m\n annotations:\n summary: The Coder instance {{ $labels.pod }} has restarted multiple times in the last 10m, which may indicate a CrashLoop.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdrestarts\n - alert: CoderdRestarts\n expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) > 2\n for: 1m\n annotations:\n summary: The Coder instance {{ $labels.pod }} has restarted multiple times in the last 10m, which may indicate a CrashLoop.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdrestarts\n- name: Coderd Replicas\n rules:\n \n - alert: CoderdReplicas\n expr: sum(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 1\n for: 5m\n annotations:\n summary: Number of alive coderd replicas is below the threshold = 1.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdreplicas\n - alert: CoderdReplicas\n expr: sum(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 3\n for: 5m\n annotations:\n summary: Number of alive coderd replicas is below the threshold = 3.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdreplicas\n - alert: CoderdReplicas\n expr: sum(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 2\n for: 5m\n annotations:\n summary: Number of alive coderd replicas is below the threshold = 2.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdreplicas\n- name: Coderd Workspace Build Failures\n rules:\n \n - alert: CoderdWorkspaceBuildFailures\n expr: sum(increase(coderd_workspace_builds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, status=\"failed\" }[10m])) > 10\n for: 10m\n annotations:\n summary: Workspace builds have failed multiple times in the last 10m, which may indicate a broken Coder template.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdworkspacebuildfailures\n - alert: CoderdWorkspaceBuildFailures\n expr: sum(increase(coderd_workspace_builds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, status=\"failed\" }[10m])) > 2\n for: 10m\n annotations:\n summary: Workspace builds have failed multiple times in the last 10m, which may indicate a broken Coder template.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdworkspacebuildfailures\n - alert: CoderdWorkspaceBuildFailures\n expr: sum(increase(coderd_workspace_builds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, status=\"failed\" }[10m])) > 5\n for: 10m\n annotations:\n summary: Workspace builds have failed multiple times in the last 10m, which may indicate a broken Coder template.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdworkspacebuildfailures\n- name: Coderd Ineligible Prebuilds\n rules:\n \n - alert: CoderdIneligiblePrebuilds\n expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_running - coderd_prebuilt_workspaces_eligible) > 0\n for: 10m\n annotations:\n summary: >\n {{ $value }} prebuilt workspace(s) are currently ineligible for claiming for the \"{{ $labels.template_name }}\" template and \"{{ $labels.preset_name }}\" preset.\n This usually indicates that the agent has not started correctly, or is still running its startup scripts after an extended period of time.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdineligibleprebuilds\n- name: Coderd Unprovisioned Prebuilt Workspaces\n rules:\n \n - alert: CoderdUnprovisionedPrebuiltWorkspaces\n expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_desired - coderd_prebuilt_workspaces_running) > 0\n for: 10m\n annotations:\n summary: >\n {{ $value }} prebuilt workspace(s) not yet been provisioned for the \"{{ $labels.template_name }}\" template and \"{{ $labels.preset_name }}\" preset.\n labels:\n severity: warn\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdunprovisionedprebuiltworkspaces "
+ provisionerd.yaml: "groups:\n- name: Provisionerd Replicas\n rules:\n \n - alert: ProvisionerdReplicas\n expr: sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 1\n for: 5m\n annotations:\n summary: Number of alive provisionerd replicas is below the threshold = 1.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#provisionerdreplicas\n - alert: ProvisionerdReplicas\n expr: sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 3\n for: 5m\n annotations:\n summary: Number of alive provisionerd replicas is below the threshold = 3.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#provisionerdreplicas\n - alert: ProvisionerdReplicas\n expr: sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 2\n for: 5m\n annotations:\n summary: Number of alive provisionerd replicas is below the threshold = 2.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#provisionerdreplicas "
+ enterprise.yaml: "groups:\n - name: Licences\n rules:\n \n - alert: CoderLicenseSeats\n expr: 'max(coderd_license_active_users) / max(coderd_license_limit_users) >=1'\n for: 1m\n annotations:\n summary: Your Coder enterprise licence usage is now at {{ $value | humanizePercentage }} capacity.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/enterprise#coderlicenseseats\n - alert: CoderLicenseSeats\n expr: 'max(coderd_license_active_users) / max(coderd_license_limit_users) >=0.9'\n for: 1m\n annotations:\n summary: Your Coder enterprise licence usage is now at {{ $value | humanizePercentage }} capacity.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/enterprise#coderlicenseseats "
+ postgres.yaml: "groups:\n- name: Notifications\n rules:\n \n - alert: PostgresNotificationQueueFillingUp\n expr: pg_pubsub_usage > 0.9\n for: 15m\n annotations:\n summary: The postgres instance {{ $labels.instance }} has a notification that is filling up, which may impact application performance.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresnotificationqueuefillingup\n - alert: PostgresNotificationQueueFillingUp\n expr: pg_pubsub_usage > 0.5\n for: 15m\n annotations:\n summary: The postgres instance {{ $labels.instance }} has a notification that is filling up, which may impact application performance.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresnotificationqueuefillingup\n - alert: PostgresNotificationQueueFillingUp\n expr: pg_pubsub_usage > 0.8\n for: 15m\n annotations:\n summary: The postgres instance {{ $labels.instance }} has a notification that is filling up, which may impact application performance.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresnotificationqueuefillingup\n\n- name: Liveness\n rules:\n \n - alert: PostgresDown\n expr: pg_up == 0\n for: 1m\n annotations:\n summary: The postgres instance {{ $labels.instance }} is down!\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresdown\n\n\n- name: Connections\n rules:\n \n - alert: PostgresConnectionsRunningLow\n expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * 0.9)\n for: 5m\n labels:\n summary: The postgres instance {{ $labels.instance }} is running low on connections which may impact application performance.\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresconnectionsrunninglow\n - alert: PostgresConnectionsRunningLow\n expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * 0.5)\n for: 5m\n labels:\n summary: The postgres instance {{ $labels.instance }} is running low on connections which may impact application performance.\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresconnectionsrunninglow\n - alert: PostgresConnectionsRunningLow\n expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * 0.8)\n for: 5m\n labels:\n summary: The postgres instance {{ $labels.instance }} is running low on connections which may impact application performance.\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresconnectionsrunninglow"
+---
+# Source: coder-observability/templates/configmap-runbooks.yaml
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: runbooks
+ namespace: coder-observability
+ annotations:
+ checksum/config: b0c41033d0385ee3d46488f08e85bcef0d939614dcb99194e0c5913dbf0c2c33
+data:
+ coderd.md: |-
+ # Coderd Runbooks
+
+ ## CoderdCPUUsage
+
+ The CPU usage of one or more Coder pods has been close to the limit defined for
+ the deployment. This can cause slowness in the application, workspaces becoming
+ unavailable, and may lead to the application failing its liveness probes and
+ being restarted.
+
+ To resolve this issue, increase the CPU limits of the Coder deployment.
+
+ If you find this occurring frequently, you may wish to check your Coder
+ deployment against [Coder's Reference Architectures](https://coder.com/docs/v2/latest/admin/architectures).
+
+ ## CoderdMemoryUsage
+
+ The memory usage of one or more Coder pods has been close to the limit defined
+ for the deployment. When the memory usage exceeds the limit, the pod(s) will be
+ restarted by Kubernetes. This will interrupt all connections to workspaces being
+ handled by the affected pod(s).
+
+ To resolve this issue, increase the memory limits of the Coder deployment.
+
+ If you find this occurring frequently, check the memory usage over a longer
+ period of time. If it appears to be increasing monotonically, this is likely a
+ memory leak and should be considered a bug.
+
+ ## CoderdRestarts
+
+ One or more Coder pods have been restarting multiple times in the last 10
+ minutes. This may be due to a number of issues, including:
+
+ - Failure to connect to the configured database: Coder requires a reachable
+ PostgreSQL database to function. If it fails to connect, you will see an error
+ similar to the following:
+
+ ```console
+ [warn] ping postgres: retrying error="dial tcp 10.43.94.60:5432: connect: connection refused" try=3
+ ```
+
+ - Out-Of-Memory (OOM) kills due to memory usage (see [above](#codermemoryusage)),
+ - An unexpected bug causing the application to exit with an error.
+
+ If Coder is not restarting due to excessive memory usage, check the logs:
+
+ 1. Check the logs of the deployment for any errors,
+
+ ```console
+ kubectl -n logs deployment/coder --previous
+ ```
+
+ 2. Check any Kubernetes events related to the deployment,
+
+ ```console
+ kubectl -n events --watch
+ ```
+
+ ## CoderdReplicas
+
+ One or more Coderd replicas are down. This may cause availability problems and elevated
+ response times for user and agent API calls.
+
+ To resolve this issue, review the Coder deployment for possible `CrashLoopBackOff`
+ instances or re-adjust alarm levels based on the actual number of replicas.
+
+ ## CoderdWorkspaceBuildFailures
+
+ A few workspace build errors have been recently observed.
+
+ Review Prometheus metrics to identify failed jobs. Check the workspace build logs
+ to determine if there is a relationship with a new template version or a buggy
+ Terraform plugin.
+
+ ## CoderdLicenseSeats
+
+ Your Enterprise license is approaching or has exceeded the number of seats purchased.
+
+ Please contact your Coder sales contact, or visit https://coder.com/contact/sales.
+
+ ## CoderdIneligiblePrebuilds
+
+ Prebuilds only become eligible to be claimed by users once the workspace's agent is a) running and b) all of its startup
+ scripts have completed.
+
+ If a prebuilt workspace is not eligible, view its agent logs to diagnose the problem.
+
+ ## CoderdUnprovisionedPrebuiltWorkspaces
+
+ The number of running prebuilt workspaces is lower than the desired instances. This could be for several reasons,
+ ordered by likehood:
+
+ ### Experiment/License
+
+ The prebuilds feature is currently gated behind an experiment *and* a premium license.
+
+ Ensure that the prebuilds experiment is enabled with `CODER_EXPERIMENTS=workspace-prebuilds`, and that you have a premium
+ license added.
+
+ ### Preset Validation Issue
+
+ Templates which have prebuilds configured will require a configured preset defined, with ALL of the required parameters
+ set in the preset. If any of these are missing, or any of the parameters - as defined - fail validation, then the prebuilds
+ subsystem will refuse to attempt a workspace build.
+
+ Consult the coderd logs for more information; look out for errors or warnings from the prebuilds subsystem.
+
+ ### Template Misconfiguration or Error
+
+ Prebuilt workspaces cannot be provisioned due to some issue at `terraform apply`-time. This could be due to misconfigured
+ cloud resources, improper authorization, or any number of other issues.
+
+ Visit the Workspaces page, change the search term to `owner:prebuilds`, and view on the previously failed builds. The
+ error will likely be quite obvious.
+
+ ### Provisioner Latency
+
+ If your provisioners are overloaded and cannot process provisioner jobs quickly enough, prebuilt workspaces may be affected.
+ There is no prioritization at present for prebuilt workspace jobs.
+
+ Ensure your provisioners are appropriately resources (i.e. you have enough instances) to handle the concurrent build demand.
+
+ ### Use of Workspace Tags
+
+ If you are using `coder_workspace_tags` ([docs](https://coder.com/docs/admin/templates/extending-templates/workspace-tags))
+ in your template, chances are you do not have any provisioners running or they are under-resourced (see **Provisioner Latency**).
+
+ Ensure your running provisioners are configured with your desired tags.
+
+ ### Reconciliation Loop Issue
+
+ The prebuilds subsystem runs a _reconciliation loop_ which monitors the state of prebuilt workspaces to ensure the desired
+ number of instances are present at all times. Workspace Prebuilds is currently a BETA feature and so there could be a bug
+ in this _reconciliation loop_, which should be reported to Coder.
+
+ Examine your coderd logs for any errors or warnings relating to prebuilds.
+ postgres.md: |
+ # Postgres Runbooks
+
+ ## PostgresNotificationQueueFillingUp
+
+ Postgres offers asynchronous notification via the `LISTEN` and `NOTIFY`
+ commands. Coder depends heavily on this async notification mechanism for routine
+ functionality.
+
+ This may be due to a session executing `LISTEN()` and entering a long
+ transaction. To verify:
+
+ - Check active sessions with `SELECT * FROM pg_stat_activity;`,
+ - Check the database log for the PID of the session that is preventing cleanup,
+ - Kill the query: `SELECT pg_terminate_backend();`
+
+ For more information, see the PostgreSQL documentation available here:
+
+ - [PostgreSQL documentation on `LISTEN`](https://www.postgresql.org/docs/current/sql-listen.html)
+ - [PostgreSQL documentation on `NOTIFY`](https://www.postgresql.org/docs/current/sql-notify.html)
+
+ ## PostgresDown
+
+ Postgres is not currently running, which means the Coder control plane will not be able to read or write any state.
+ Workspaces may continue to work normally but it is recommended to get Postgres back up as quickly as possible.
+
+ ## PostgresConnectionsRunningLow
+
+ PostgreSQL has a `max_connections` setting that determines the maximum number of
+ concurrent connections. Once this connection limit is reached, no new
+ connections will be possible.
+
+ To increase the maximum number of concurrent connections, update the `max_connections`
+ configuration option for your PostgreSQL instance. See the PostgreSQL
+ documentation for more details.
+
+ **Note:** You may also need to adjust `shared_buffers` after increasing
+ `max_connections`. Additionally, you may also need to adjust the kernel
+ configuration value `kernel.shmmax` in `/etc/sysctl.conf` /
+ `/etc/sysctl.conf.d`.
+
+ For more information, see:
+
+ - [PostgreSQL Documentation: Server Configuration](https://www.postgresql.org/docs/16/runtime-config-file-locations.html)
+ - [Tuning your PostgreSQL Server](https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server)
+ provisionerd.md: |
+ # Provisionerd Runbooks
+
+ ## ProvisionerdReplicas
+
+ One of more Provisioner replicas is down. Workspace builds may be queued and processed slower.
+
+ To resolve this issue, review the Coder deployment (Coder provisioner pods)
+ for possible `CrashLoopBackOff` instances or re-adjust alarm levels based on the actual
+ number of replicas.
+---
+# Source: coder-observability/templates/configmap-sql-exporter.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: sql-exporter-config
+ namespace: coder-observability
+data:
+ config.yaml: |-
+ global:
+ target:
+ name: postgres
+ data_source_name: 'postgresql://coder@localhost:5432/coder?sslmode=disable'
+ collectors:
+ - notify
+ collectors:
+ - collector_name: notify
+ metrics:
+ # Add a metric to show the current usage of the Postgres "pub/sub" mechanism
+ # See https://www.postgresql.org/docs/current/functions-info.html
+ - metric_name: pg_pubsub_usage
+ type: gauge
+ help: "The fraction (0–1) of the asynchronous notification queue's maximum size that is currently occupied by notifications that are waiting to be processed"
+ static_labels:
+ hostname: localhost
+ database: coder
+ values: [ usage ]
+ query: |
+ SELECT pg_notification_queue_usage() AS usage;
+---
+# Source: coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-coderd
+ namespace: coder-observability
+data:
+ coderd.json: |-
+ {
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Down"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 0
+ },
+ "id": 10,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 1) or vector(0)",
+ "instant": true,
+ "legendFormat": "Up",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "(count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 0) or vector(0)) > 0",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Down",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Replicas",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 18,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "One or more replicas are required to be running in order to serve the control-plane.\n\nSee [High Availability](https://coder.com/docs/v2/latest/admin/high-availability) for details on how to\nrun multiple `coderd` replicas.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "#EAB839",
+ "value": 0.9
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Enabled"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "0": {
+ "index": 1,
+ "text": "No"
+ },
+ "1": {
+ "index": 0,
+ "text": "Yes"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ]
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 0
+ },
+ "id": 32,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_license_user_limit_enabled)",
+ "instant": true,
+ "legendFormat": "Enabled",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "(\n max(coderd_license_active_users) / max(coderd_license_limit_users)\n) > 0",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Usage",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Enterprise License",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 0
+ },
+ "id": 33,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "If you would like to try Coder's [Enterprise features](https://coder.com/docs/v2/latest/enterprise), you can [request a trial license](https://coder.com/docs/v2/latest/faqs#how-do-i-add-an-enterprise-license).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Requested|Limit)/"
+ },
+ "properties": [
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 6
+ },
+ "id": 25,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__rate_interval]))",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_requests{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "CPU Usage Seconds",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 6
+ },
+ "id": 26,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The cumulative CPU used per core-second. If `coderd` was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "red",
+ "mode": "shades"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 6
+ },
+ "id": 30,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (reason) (\n count_over_time(kube_pod_container_status_terminated_reason{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "{{reason}}",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "Terminations",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 0.0001
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 2,
+ "x": 16,
+ "y": 6
+ },
+ "id": 34,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__range]))",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Restarts",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 6
+ },
+ "id": 31,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Requested|Limit)/"
+ },
+ "properties": [
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 12
+ },
+ "id": 29,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (pod) (container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(kube_pod_container_resource_requests{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "RAM Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 12
+ },
+ "id": 28,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the total memory used by each `coderd` container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 100
+ },
+ {
+ "color": "red",
+ "value": 500
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Errors"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "short"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 12,
+ "y": 12
+ },
+ "id": 16,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.5, coder_pubsub_send_latency_seconds)",
+ "instant": false,
+ "legendFormat": "Send",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.5, coder_pubsub_receive_latency_seconds)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Receive",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pubsub Latency (Median)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Errors"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "short"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 2,
+ "x": 16,
+ "y": 12
+ },
+ "id": 22,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "(\n sum(increase(coder_pubsub_latency_measure_errs_total[$__range]))\n / count(coder_pubsub_latency_measure_errs_total)\n) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Errors",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pubsub Errors",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 12
+ },
+ "id": 19,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "`coderd` uses Postgres for passing messages between subcomponents for coordination and signalling;\nthis is called \"pubsub\" (or publish-subscribe).\n\nWe measure the time for messages to be sent and received. Latencies higher than 500ms will likely lead to\nyour Coder deployment feeling sluggish. High latency is usually an indication that your Postgres server is under-resourced on CPU.\n\nHigh values for median should be concerning,\nwhile the 90th percentile shows the outliers.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 100
+ },
+ {
+ "color": "red",
+ "value": 500
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Errors"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "short"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 12,
+ "y": 15
+ },
+ "id": 21,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.9, coder_pubsub_send_latency_seconds)",
+ "instant": false,
+ "legendFormat": "Send",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "quantile(0.9, coder_pubsub_receive_latency_seconds)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Receive",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pubsub Latency (P90)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 0,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ },
+ "unit": "reqps"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 18
+ },
+ "id": 35,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by(pod) (rate(coderd_api_requests_processed_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__rate_interval]))",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "API Requests",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 18
+ },
+ "id": 36,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the number of requests per second each `coderd` replica is handling.\n\nHeavy skewing towards a single `coderd` replica indicates faulty loadbalancing.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-12h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Control Plane",
+ "uid": "coderd",
+ "version": 6,
+ "weekStart": ""
+ }
+---
+# Source: coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-prebuilds
+ namespace: coder-observability
+data:
+ prebuilds.json: |-
+ {
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": 10,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "orange",
+ "index": 2,
+ "text": "Not enabled"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Enabled"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 1,
+ "text": "Not enabled"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 15,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "valueSize": 15
+ },
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(coderd_experiments{experiment=\"workspace-prebuilds\"})",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Experiment enabled?",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 49,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max(coderd_prebuilt_workspaces_desired) by (template_name, preset_name)) or vector(0)",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Desired",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max(coderd_prebuilt_workspaces_running) by (template_name, preset_name)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Running",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max(coderd_prebuilt_workspaces_eligible) by (template_name, preset_name)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Eligible",
+ "range": false,
+ "refId": "E"
+ }
+ ],
+ "title": "Current: Global",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 48,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_created_total)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Created",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_failed_total)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Failed",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_claimed_total)) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Claimed",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "All Time: Global",
+ "type": "stat"
+ },
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 2,
+ "panels": [],
+ "repeat": "template",
+ "repeatDirection": "h",
+ "title": "$template",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 0,
+ "y": 5
+ },
+ "id": 31,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Desired",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Running",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Eligible",
+ "range": false,
+ "refId": "E"
+ }
+ ],
+ "title": "Current: $preset",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMax": 10,
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 18,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "smooth",
+ "lineStyle": {
+ "fill": "solid"
+ },
+ "lineWidth": 2,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Desired"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 10,
+ 10
+ ],
+ "fill": "dash"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 85
+ },
+ {
+ "id": "custom.fillBelowTo",
+ "value": "Running"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Running"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillBelowTo",
+ "value": "Eligible"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Eligible"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 4,
+ "y": 5
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Desired",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Running",
+ "range": true,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Eligible",
+ "range": true,
+ "refId": "E"
+ }
+ ],
+ "title": "Pool Capacity: $preset",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMax": 10,
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 13,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "smooth",
+ "lineStyle": {
+ "fill": "solid"
+ },
+ "lineWidth": 2,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Failed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Created"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "blue",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Desired"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Running"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Eligible"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Claimed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "dark-green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 12,
+ "y": 5
+ },
+ "id": 38,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "floor(max(increase(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Created",
+ "range": true,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "floor(max(increase(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Failed",
+ "range": true,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "floor(max(increase(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "Claimed",
+ "range": true,
+ "refId": "F"
+ }
+ ],
+ "title": "Pool Operations: $preset",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "fixedColor": "text",
+ "mode": "fixed"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 5
+ },
+ "id": 1,
+ "interval": "30s",
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.3",
+ "repeat": "preset",
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Created",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Failed",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Claimed",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "All Time: $preset",
+ "type": "stat"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_prebuilt_workspaces_desired,template_name)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Template",
+ "multi": false,
+ "name": "template",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_prebuilt_workspaces_desired,template_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ },
+ {
+ "allValue": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Preset",
+ "multi": true,
+ "name": "preset",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-12h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Prebuilds",
+ "uid": "cej6jysyme22oa",
+ "version": 13,
+ "weekStart": ""
+ }
+---
+# Source: coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-provisionerd
+ namespace: coder-observability
+data:
+ provisionerd.json: |-
+ {
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 0
+ },
+ "id": 17,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`})",
+ "instant": true,
+ "legendFormat": "Built-in",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder-provisioner.*`, namespace=`coder`})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "External",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Provisioners",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 20,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Provisioners are responsible for building workspaces.\n\n`coderd` runs built-in provisioners by default. Control this with the `CODER_PROVISIONER_DAEMONS` environment variable or `--provisioner-daemons` flag.\n\nYou can also consider [External Provisioners](https://coder.com/docs/v2/latest/admin/provisioners). Running both built-in and external provisioners is perfectly valid,\nalthough dedicated (external) provisioners will generally give the best build performance.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 0
+ },
+ "id": 21,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "(sum(coderd_provisionerd_jobs_current) > 0) or vector(0)",
+ "instant": false,
+ "legendFormat": "Current",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Capacity",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Builds",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 0
+ },
+ "id": 22,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The maximum number of simultaneous builds is equivalent to the number of `provisionerd` daemons running.\n\nThe \"Capacity\" panel shows the how many simultaneous builds are possible.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 7
+ },
+ "id": 23,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "histogram_quantile(0.5, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Median",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "histogram_quantile(0.9, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "90th Percentile",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Times",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 7
+ },
+ "id": 24,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the median and 90th percentile workspace build times.\n\nLong build times can impede developers' productivity while they wait for workspaces to start or be created.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "failed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Failure"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Success"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 7
+ },
+ "id": 25,
+ "interval": "1h",
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count[$__interval]))",
+ "hide": false,
+ "instant": false,
+ "interval": "1h",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Count Per Hour",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 7
+ },
+ "id": 26,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "_NOTE: this will not show the current hour._",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Limit|Requested)/"
+ },
+ "properties": [
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 14
+ },
+ "id": 28,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder-provisioner.*`, namespace=`coder`}[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_limits{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_requests{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"cpu\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "CPU Usage Seconds",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 14
+ },
+ "id": 30,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The cumulative CPU used per core-second. If the process was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "fieldMinMax": false,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/(Limit|Requested)/"
+ },
+ "properties": [
+ {
+ "id": "custom.drawStyle",
+ "value": "line"
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 5
+ },
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "dash": [
+ 0,
+ 10
+ ],
+ "fill": "dot"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Requested"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 14
+ },
+ "id": 29,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (pod) (container_memory_working_set_bytes{pod=~`coder-provisioner.*`, namespace=`coder`})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_limits{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Limit",
+ "range": true,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(kube_pod_container_resource_requests{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"memory\"})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "Requested",
+ "range": true,
+ "refId": "C"
+ }
+ ],
+ "title": "RAM Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 14
+ },
+ "id": 31,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This shows the total memory used by each container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "gridPos": {
+ "h": 18,
+ "w": 18,
+ "x": 0,
+ "y": 21
+ },
+ "id": 27,
+ "options": {
+ "dedupStrategy": "exact",
+ "enableLogDetails": true,
+ "prettifyLogMessage": false,
+ "showCommonLabels": false,
+ "showLabels": false,
+ "showTime": true,
+ "sortOrder": "Descending",
+ "wrapLogMessage": false
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": "{namespace=~`(coder|coder)`, logger=~\"(.*runner|terraform|provisioner.*)\"}",
+ "queryType": "range",
+ "refId": "A"
+ }
+ ],
+ "title": "Logs",
+ "type": "logs"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 21
+ },
+ "id": 32,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This panel shows all logs across built-in and [external provisioners](https://coder.com/docs/v2/latest/admin/provisioners).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-12h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Provisioners",
+ "uid": "provisionerd",
+ "version": 10,
+ "weekStart": ""
+ }
+---
+# Source: coder-observability/templates/dashboards/configmap-dashboards-status.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-status
+ namespace: coder-observability
+data:
+ status.json: |-
+ {
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": false,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 9,
+ "title": "Application",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Down"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 0,
+ "y": 1
+ },
+ "id": 10,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 1) or vector(0) > 0",
+ "instant": true,
+ "legendFormat": "Up",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 0) or vector(0) > 0",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Down",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Coder Replicas",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 4,
+ "y": 1
+ },
+ "id": 16,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`})",
+ "instant": true,
+ "legendFormat": "Built-in",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder-provisioner.*`, namespace=`coder`})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "External",
+ "range": false,
+ "refId": "B"
+ }
+ ],
+ "title": "Provisioners",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": []
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "failed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "orange",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Failed"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "displayName",
+ "value": "Success"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 8,
+ "y": 1
+ },
+ "id": 17,
+ "options": {
+ "displayLabels": [
+ "name",
+ "value"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "round(sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count{pod!=``}[$__range])))",
+ "instant": true,
+ "legendFormat": "{{status}}",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Workspace Builds",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 12,
+ "y": 1
+ },
+ "id": 18,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count(kube_pod_status_ready{condition=\"true\", namespace=`coder-workspaces`} == 1)\nor\ncount(coderd_api_workspace_latest_build{status=\"running\"})\nor\nvector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Running Workspaces",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/.*RAM/"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 16,
+ "y": 1
+ },
+ "id": 15,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[1h:1m])\n [$__range:]\n )\n)",
+ "instant": true,
+ "legendFormat": "Control Plane CPU",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{pod=~`coder-provisioner.*`, namespace=`coder`}[1h:1m])\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Provisioner CPU",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Control Plane RAM",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{pod=~`coder-provisioner.*`, namespace=`coder`}\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Provisioner RAM",
+ "range": false,
+ "refId": "D"
+ }
+ ],
+ "title": "Resource Usage High Watermark (Cumulative)",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 1
+ },
+ "id": 19,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(pg_up) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Postgres",
+ "type": "stat"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 3,
+ "panels": [],
+ "title": "Observability Tools",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 0,
+ "y": 9
+ },
+ "id": 1,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"coder-observability/prometheus/server\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Prometheus",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 4,
+ "y": 9
+ },
+ "id": 4,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"coder-observability/loki/write\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Write Path",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 8,
+ "y": 9
+ },
+ "id": 5,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"coder-observability/loki/read\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Read Path",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 12,
+ "y": 9
+ },
+ "id": 6,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"coder-observability/loki/backend\", container=\"loki\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Backend",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 16,
+ "y": 9
+ },
+ "id": 7,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"coder-observability/loki/canary\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Canary",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Down"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Up"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 20,
+ "y": 9
+ },
+ "id": 8,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(up{job=\"coder-observability/grafana-agent/grafana-agent\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Grafana Agent",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Unhealthy"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Healthy"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 0,
+ "y": 14
+ },
+ "id": 12,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "prometheus_config_last_reload_successful{job=\"coder-observability/prometheus/server\"}",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Prometheus Config",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Unhealthy"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Healthy"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 4,
+ "y": 14
+ },
+ "id": 14,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "min(loki_runtime_config_last_reload_successful) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Loki Config",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "0": {
+ "color": "red",
+ "index": 1,
+ "text": "Unhealthy"
+ },
+ "1": {
+ "color": "green",
+ "index": 0,
+ "text": "Healthy"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "orange",
+ "index": 2,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "orange",
+ "index": 3,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 4,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 8,
+ "y": 14
+ },
+ "id": 13,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "min(agent_config_last_load_successful{job=\"coder-observability/grafana-agent/grafana-agent\"}) or vector(0)",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Grafana Agent Config",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Retention Limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Write-Ahead Log"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Storage"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#f9f9fb",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 12,
+ "y": 14
+ },
+ "id": 11,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "(\n prometheus_tsdb_wal_storage_size_bytes{job=\"coder-observability/prometheus/server\"} +\n prometheus_tsdb_storage_blocks_bytes{job=\"coder-observability/prometheus/server\"} +\n prometheus_tsdb_symbol_table_size_bytes{job=\"coder-observability/prometheus/server\"}\n)\n/\nprometheus_tsdb_retention_limit_bytes{job=\"coder-observability/prometheus/server\"}",
+ "instant": false,
+ "legendFormat": "Retention limit used",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Prometheus Storage",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 16,
+ "y": 14
+ },
+ "id": 20,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 35
+ },
+ "textMode": "auto",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{namespace=\"coder-observability\", resource=\"cpu\"})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Requested",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{namespace=\"coder-observability\"}[$__rate_interval])\n [$__range:]\n )\n)",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "High Watermark",
+ "range": false,
+ "refId": "D"
+ }
+ ],
+ "title": "CPU",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 20,
+ "y": 14
+ },
+ "id": 21,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "area",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 35
+ },
+ "textMode": "value_and_name",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{namespace=\"coder-observability\", resource=\"memory\"})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Requested",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n max_over_time(container_memory_working_set_bytes{namespace=\"coder-observability\"}[$__range])\n)",
+ "instant": true,
+ "legendFormat": "High Watermark",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "RAM",
+ "type": "stat"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-24h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Status",
+ "uid": "coder-status",
+ "version": 1,
+ "weekStart": ""
+ }
+---
+# Source: coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-workspace-detail
+ namespace: coder-observability
+data:
+ workspaces-detail.json: |-
+ {
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 1.2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 28,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "**HINT**: use the dropdowns above to filter by specific workspace(s).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "blue",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "CPUs Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "none"
+ },
+ {
+ "id": "decimals",
+ "value": 2
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "PVC Capacity"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 20,
+ "x": 0,
+ "y": 1.2
+ },
+ "id": 29,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 40
+ },
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "group by (template_name) (coderd_agents_up{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Template Name",
+ "range": false,
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "group by (template_version) (coderd_agents_up{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Template Version",
+ "range": false,
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "group by (username) (coderd_agents_up{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Owner",
+ "range": false,
+ "refId": "C"
+ }
+ ],
+ "title": "Details",
+ "transformations": [
+ {
+ "id": "concatenate",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value #A": true,
+ "Value #B": true,
+ "Value #C": true,
+ "Value #D": true
+ },
+ "includeByName": {},
+ "indexByName": {
+ "CPUs Requested": 7,
+ "PVC Capacity": 9,
+ "RAM Requested": 8,
+ "Time": 0,
+ "Value #A": 5,
+ "Value #B": 3,
+ "Value #C": 6,
+ "template_name": 2,
+ "template_version": 4,
+ "username": 1
+ },
+ "renameByName": {
+ "Value #C": "",
+ "lifecycle_state": "Agent State",
+ "template_name": "Template",
+ "template_version": "Template Version",
+ "username": "Owner"
+ }
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 1.2
+ },
+ "id": 38,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Essential information about the selected workspace.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "blue",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "CPUs Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "none"
+ },
+ {
+ "id": "decimals",
+ "value": 2
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Requested"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "PVC Capacity"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 20,
+ "x": 0,
+ "y": 5.2
+ },
+ "id": 36,
+ "options": {
+ "reduceOptions": {
+ "values": false,
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/"
+ },
+ "orientation": "vertical",
+ "textMode": "value_and_name",
+ "wideLayout": false,
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 40
+ }
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", namespace=`coder-workspaces`, resource=\"cpu\"})",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "CPUs Requested",
+ "range": false,
+ "refId": "D"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", namespace=`coder-workspaces`, resource=\"memory\"})",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "RAM Requested",
+ "range": false,
+ "refId": "E"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum(\n kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\".*$workspace_name.*\",namespace=`coder-workspaces`}\n * on(persistentvolumeclaim) group_right\n group by (persistentvolumeclaim, persistentvolume) (\n label_replace(\n kube_persistentvolume_claim_ref,\n \"persistentvolumeclaim\",\n \"$1\",\n \"name\",\n \"(.+)\"\n )\n )\n * on (persistentvolume)\n kube_persistentvolume_capacity_bytes\n)",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "PVC Capacity",
+ "range": false,
+ "refId": "F"
+ }
+ ],
+ "title": "Resources",
+ "transformations": [
+ {
+ "id": "concatenate",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value #A": true,
+ "Value #B": true,
+ "Value #C": true,
+ "Value #D": true
+ },
+ "includeByName": {},
+ "indexByName": {
+ "CPUs Requested": 7,
+ "PVC Capacity": 9,
+ "RAM Requested": 8,
+ "Time": 0,
+ "Value #A": 5,
+ "Value #B": 3,
+ "Value #C": 6,
+ "template_name": 2,
+ "template_version": 4,
+ "username": 1
+ },
+ "renameByName": {
+ "Value #C": "",
+ "lifecycle_state": "Agent State",
+ "template_name": "Template",
+ "template_version": "Template Version",
+ "username": "Owner"
+ }
+ }
+ }
+ ],
+ "type": "stat",
+ "description": ""
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
+ {
+ "options": {
+ "created": {
+ "color": "light-blue",
+ "index": 1,
+ "text": "Created"
+ },
+ "off": {
+ "color": "text",
+ "index": 8,
+ "text": "Off"
+ },
+ "ready": {
+ "color": "green",
+ "index": 0,
+ "text": "Ready"
+ },
+ "shutdown_error": {
+ "color": "red",
+ "index": 7,
+ "text": "Shutdown Error"
+ },
+ "shutdown_timeout": {
+ "color": "purple",
+ "index": 6,
+ "text": "Shutdown Timeout"
+ },
+ "shutting_down": {
+ "color": "light-purple",
+ "index": 5,
+ "text": "Shutting Down"
+ },
+ "start_error": {
+ "color": "red",
+ "index": 4,
+ "text": "Start Error"
+ },
+ "start_timeout": {
+ "color": "orange",
+ "index": 3,
+ "text": "Start Timeout"
+ },
+ "starting": {
+ "color": "super-light-green",
+ "index": 2,
+ "text": "Starting"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "match": "empty",
+ "result": {
+ "color": "text",
+ "index": 9,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ },
+ {
+ "options": {
+ "match": "null",
+ "result": {
+ "color": "text",
+ "index": 10,
+ "text": "Unknown"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "text",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 9.2
+ },
+ "id": 35,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/^lifecycle_state$/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "valueSize": 50
+ },
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (lifecycle_state) (coderd_agents_connections{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "D"
+ }
+ ],
+ "title": "Agent Lifecycle State",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
+ {
+ "options": {
+ "-1": {
+ "color": "light-orange",
+ "index": 0,
+ "text": "Not completed yet"
+ }
+ },
+ "type": "value"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "#EAB839",
+ "value": 60
+ },
+ {
+ "color": "red",
+ "value": 120
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 3,
+ "x": 4,
+ "y": 9.2
+ },
+ "id": 33,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/^Value$/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "valueSize": 50
+ },
+ "textMode": "value",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_agentstats_startup_script_seconds{workspace_name=~\"$workspace_name\"}) or vector(-1)",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "C"
+ }
+ ],
+ "title": "Agent Startup Script Execution Time",
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 3,
+ "x": 7,
+ "y": 9.2
+ },
+ "id": 39,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 50
+ },
+ "textMode": "value_and_name",
+ "wideLayout": false
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max by (app) (\n label_replace(\n {workspace_name=~\"$workspace_name\", __name__=~\"coderd_agentstats_session_count_.*\"},\n \"app\",\n \"$1\",\n \"__name__\",\n \"coderd_agentstats_session_count_(.*)\"\n )\n)>0",
+ "format": "time_series",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "{{app}}",
+ "range": false,
+ "refId": "C"
+ }
+ ],
+ "title": "App Session Counts",
+ "transformations": [
+ {
+ "id": "concatenate",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {}
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/.*Bytes/"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 10,
+ "x": 10,
+ "y": 9.2
+ },
+ "id": 34,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "center",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "/.*/",
+ "values": false
+ },
+ "showPercentChange": false,
+ "text": {
+ "titleSize": 20,
+ "valueSize": 50
+ },
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(coderd_agents_connection_latencies_seconds{workspace_name=~\"$workspace_name\"})",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Connection Latency",
+ "range": false,
+ "refId": "C"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_rx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Received Bytes",
+ "range": false,
+ "refId": "rx"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_tx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Transmitted Bytes",
+ "range": false,
+ "refId": "tx"
+ }
+ ],
+ "title": "Networking",
+ "transformations": [
+ {
+ "id": "merge",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "Value #A": "Received Bytes",
+ "Value #B": "Transmitted Bytes",
+ "Value #C": "Connection Latency",
+ "Value #rx": "Received Bytes",
+ "Value #tx": "Transmitted Bytes"
+ }
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 20,
+ "y": 9.2
+ },
+ "id": 40,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Essential information about this workspace's agent.\n\nRead more about the agent [here](https://coder.com/docs/v2/latest/about/architecture#agents).",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "filterable": true,
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "status"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "failed": {
+ "color": "orange",
+ "index": 1,
+ "text": "Failure"
+ },
+ "success": {
+ "color": "green",
+ "index": 0,
+ "text": "Success"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Workspace Transition"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "DESTROY": {
+ "color": "red",
+ "index": 0
+ },
+ "START": {
+ "color": "blue",
+ "index": 1
+ },
+ "STOP": {
+ "color": "purple",
+ "index": 2
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 20,
+ "x": 0,
+ "y": 15.2
+ },
+ "id": 6,
+ "interval": "",
+ "options": {
+ "cellHeight": "sm",
+ "footer": {
+ "countRows": false,
+ "enablePagination": true,
+ "fields": [],
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "Time"
+ }
+ ]
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"}\n) > 0",
+ "format": "table",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Log",
+ "transformations": [
+ {
+ "disabled": true,
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Count": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Total": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Value": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Workspace Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Ownert": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_owner": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "desc": true,
+ "field": "Value"
+ }
+ ]
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": false
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "Value": "Count",
+ "Value (sum)": "Total",
+ "status": "Status",
+ "template_name": "Template Name",
+ "template_version": "Template Version",
+ "workspace_name": "Workspace Name",
+ "workspace_owner": "Workspace Owner",
+ "workspace_transition": "Workspace Transition"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 15.2
+ },
+ "id": 37,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 20,
+ "x": 0,
+ "y": 22.2
+ },
+ "id": 7,
+ "options": {
+ "dedupStrategy": "exact",
+ "enableLogDetails": true,
+ "prettifyLogMessage": false,
+ "showCommonLabels": false,
+ "showLabels": false,
+ "showTime": true,
+ "sortOrder": "Descending",
+ "wrapLogMessage": false
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": "{namespace=~`(coder|coder)`, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" | line_format `{{ printf \"[\\033[35m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`",
+ "hide": false,
+ "queryType": "range",
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": "{namespace=`coder-workspaces`, pod=~\".*($workspace_name).*\"} | line_format `{{ printf \"[\\033[32m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`",
+ "hide": false,
+ "queryType": "range",
+ "refId": "B"
+ }
+ ],
+ "title": "Logs",
+ "type": "logs"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 22.2
+ },
+ "id": 24,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The logs to the left come both from provisioners and workspace logs.\n\nProvisioner logs matching the name filter are highlighted in magenta, while\nworkspace logs matching the name filter are highlighted in green.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_agents_up,workspace_name)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Workspace Name Filter",
+ "multi": false,
+ "name": "workspace_name",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_agents_up,workspace_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-12h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Workspace Detail",
+ "uid": "workspace-detail",
+ "version": 9,
+ "weekStart": ""
+ }
+---
+# Source: coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: dashboards-workspaces
+ namespace: coder-observability
+data:
+ workspaces.json: |-
+ {
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 1.2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 28,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "**HINT**: use the dropdowns above to filter by specific workspaces and/or templates.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 1.2
+ },
+ "id": 31,
+ "title": "Resources",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 0,
+ "y": 2.2
+ },
+ "id": 33,
+ "options": {
+ "legend": {
+ "calcs": [
+ "mean",
+ "stdDev",
+ "min",
+ "max",
+ "lastNotNull"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{namespace=`coder-workspaces`}[$__rate_interval]))",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "CPU Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 10,
+ "y": 2.2
+ },
+ "id": 37,
+ "options": {
+ "legend": {
+ "calcs": [
+ "mean",
+ "stdDev",
+ "min",
+ "max",
+ "lastNotNull"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "max by (pod) (container_memory_working_set_bytes{namespace=`coder-workspaces`})",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "RAM Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 2.2
+ },
+ "id": 36,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "The cumulative CPU used per core-second. If a workspace was using a full CPU core, that would be represented as 1 second.\n\nSee the Kubernetes [documentation](https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units) for more details.\n\nThe total memory used by each workspace container is represented; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 0,
+ "y": 10.2
+ },
+ "id": 38,
+ "options": {
+ "legend": {
+ "calcs": [
+ "sum"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (pod) (\n round(increase(kube_pod_container_status_restarts_total{namespace=`coder-workspaces`}[$__interval]))\n) > 0",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Pod Restarts",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 10,
+ "y": 10.2
+ },
+ "id": 39,
+ "options": {
+ "legend": {
+ "calcs": [
+ "sum"
+ ],
+ "displayMode": "table",
+ "placement": "bottom",
+ "showLegend": true,
+ "sortBy": "Max",
+ "sortDesc": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (pod, reason) (\n count_over_time(kube_pod_container_status_terminated_reason{namespace=`coder-workspaces`}[$__interval])\n)",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "{{pod}}:{{reason}}",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Terminations",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 10.2
+ },
+ "id": 40,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible\n\nPod restarts are not necessarily problematic, but they are worth noting.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 18.2
+ },
+ "id": 30,
+ "panels": [],
+ "title": "Builds",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 1,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "DESTROY"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "STOP"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "START"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "blue",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 0,
+ "y": 19.2
+ },
+ "id": 2,
+ "interval": "5m",
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0",
+ "hide": false,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "B"
+ }
+ ],
+ "title": "Successful Builds by State",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 100,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "DESTROY"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "STOP"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "purple",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "START"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "blue",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 10,
+ "x": 10,
+ "y": 19.2
+ },
+ "id": 1,
+ "interval": "5m",
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Unsuccessful Builds by State",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 19.2
+ },
+ "id": 34,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "Workspaces \"transition\" between `STOP`, `START`, and `DESTROY` states.\n\nWorkspaces transition between states when a \"build\" is initiated, which is an execution of `terraform` against the chosen template.\n\nUse the \"Build Count\" table to identify workspace owners which may be struggling with template builds, in order to proactively reach out to them with assistance.\n\nConsult the [Template documentation](https://coder.com/docs/v2/latest/templates) for more information.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "filterable": true,
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "status"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "failed": {
+ "color": "orange",
+ "index": 1,
+ "text": "Failure"
+ },
+ "success": {
+ "color": "green",
+ "index": 0,
+ "text": "Success"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Workspace Transition"
+ },
+ "properties": [
+ {
+ "id": "custom.cellOptions",
+ "value": {
+ "type": "color-text"
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "DESTROY": {
+ "color": "red",
+ "index": 0
+ },
+ "START": {
+ "color": "blue",
+ "index": 1
+ },
+ "STOP": {
+ "color": "purple",
+ "index": 2
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 20,
+ "x": 0,
+ "y": 27.2
+ },
+ "id": 6,
+ "interval": "",
+ "options": {
+ "cellHeight": "sm",
+ "footer": {
+ "countRows": false,
+ "enablePagination": true,
+ "fields": [],
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "Time"
+ }
+ ]
+ },
+ "pluginVersion": "10.4.0",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0",
+ "format": "table",
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Build Log",
+ "transformations": [
+ {
+ "disabled": true,
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Count": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Template Version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Total": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Value": {
+ "aggregations": [
+ "sum"
+ ],
+ "operation": "aggregate"
+ },
+ "Workspace Name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Ownert": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "Workspace Transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "status": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "template_version": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_name": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_owner": {
+ "aggregations": [],
+ "operation": "groupby"
+ },
+ "workspace_transition": {
+ "aggregations": [],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "desc": true,
+ "field": "Value"
+ }
+ ]
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": false
+ },
+ "includeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "Value": "Count",
+ "Value (sum)": "Total",
+ "status": "Status",
+ "template_name": "Template Name",
+ "template_version": "Template Version",
+ "workspace_name": "Workspace Name",
+ "workspace_owner": "Workspace Owner",
+ "workspace_transition": "Workspace Transition"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 27.2
+ },
+ "id": 29,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 0,
+ "y": 37.2
+ },
+ "id": 8,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (workspace_owner) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Workspace by User",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 5,
+ "y": 37.2
+ },
+ "id": 9,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (workspace_owner, template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": "{{workspace_owner}}:{{template_name}}",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Workspace by User/Template",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 10,
+ "y": 37.2
+ },
+ "id": 4,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [
+ "name"
+ ],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": "__auto",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Template Usage",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 15,
+ "y": 37.2
+ },
+ "id": 5,
+ "interval": "1h",
+ "options": {
+ "displayLabels": [],
+ "legend": {
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true,
+ "values": [
+ "percent"
+ ]
+ },
+ "pieType": "pie",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "editorMode": "code",
+ "exemplar": false,
+ "expr": "count by (template_name, template_version) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})",
+ "instant": true,
+ "legendFormat": "{{template_name}}:{{template_version}}",
+ "range": false,
+ "refId": "A"
+ }
+ ],
+ "title": "Template Version Usage",
+ "type": "piechart"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 37.2
+ },
+ "id": 24,
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "These charts show the distribution of workspaces and templates.\n\nUse these charts to identify which users have outdated templates, and which templates are the most/least popular in your organisation.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 44.2
+ },
+ "id": 32,
+ "panels": [],
+ "title": "Logs",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "gridPos": {
+ "h": 10,
+ "w": 20,
+ "x": 0,
+ "y": 45.2
+ },
+ "id": 7,
+ "options": {
+ "dedupStrategy": "exact",
+ "enableLogDetails": true,
+ "prettifyLogMessage": false,
+ "showCommonLabels": false,
+ "showLabels": false,
+ "showTime": false,
+ "sortOrder": "Descending",
+ "wrapLogMessage": true
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "loki"
+ },
+ "editorMode": "code",
+ "expr": "{namespace=~`(coder|coder)`, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" or \"$template_name\"",
+ "queryType": "range",
+ "refId": "A"
+ }
+ ],
+ "title": "Logs",
+ "type": "logs"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "description": "",
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 45.2
+ },
+ "id": 22,
+ "links": [
+ {
+ "title": "Provisioners Dashboard",
+ "url": "/d/provisionerd/provisioners?${__url_time_range}"
+ }
+ ],
+ "options": {
+ "code": {
+ "language": "plaintext",
+ "showLineNumbers": false,
+ "showMiniMap": false
+ },
+ "content": "These are the logs produced by the [Provisioners](/d/provisionerd/provisioners?${__url_time_range}).\n\nUse the dropdowns at the top to filter the logs down to a specific workspace and/or template.",
+ "mode": "markdown"
+ },
+ "pluginVersion": "10.4.0",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_workspace_builds_total,workspace_name)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Workspace Name Filter",
+ "multi": true,
+ "name": "workspace_name",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_workspace_builds_total,workspace_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "type": "query"
+ },
+ {
+ "allValue": "",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "definition": "label_values(coderd_workspace_builds_total,template_name)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Template Name Filter",
+ "multi": true,
+ "name": "template_name",
+ "options": [],
+ "query": {
+ "qryType": 1,
+ "query": "label_values(coderd_workspace_builds_total,template_name)",
+ "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ },
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-12h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Workspaces",
+ "uid": "workspaces",
+ "version": 2,
+ "weekStart": ""
+ }
+---
+# Source: coder-observability/charts/grafana/templates/pvc.yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: grafana
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ finalizers:
+ - kubernetes.io/pvc-protection
+spec:
+ accessModes:
+ - "ReadWriteOnce"
+ resources:
+ requests:
+ storage: "10Gi"
+---
+# Source: coder-observability/charts/grafana-agent/templates/rbac.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: grafana-agent
+ labels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+rules:
+ # Rules which allow discovery.kubernetes to function.
+ - apiGroups:
+ - ""
+ - "discovery.k8s.io"
+ - "networking.k8s.io"
+ resources:
+ - endpoints
+ - endpointslices
+ - ingresses
+ - nodes
+ - nodes/proxy
+ - nodes/metrics
+ - pods
+ - services
+ verbs:
+ - get
+ - list
+ - watch
+ # Rules which allow loki.source.kubernetes and loki.source.podlogs to work.
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/log
+ - namespaces
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - "monitoring.grafana.com"
+ resources:
+ - podlogs
+ verbs:
+ - get
+ - list
+ - watch
+ # Rules which allow mimir.rules.kubernetes to work.
+ - apiGroups: ["monitoring.coreos.com"]
+ resources:
+ - prometheusrules
+ verbs:
+ - get
+ - list
+ - watch
+ - nonResourceURLs:
+ - /metrics
+ verbs:
+ - get
+ # Rules for prometheus.kubernetes.*
+ - apiGroups: ["monitoring.coreos.com"]
+ resources:
+ - podmonitors
+ - servicemonitors
+ - probes
+ verbs:
+ - get
+ - list
+ - watch
+ # Rules which allow eventhandler to work.
+ - apiGroups:
+ - ""
+ resources:
+ - events
+ verbs:
+ - get
+ - list
+ - watch
+ # needed for remote.kubernetes.*
+ - apiGroups: [""]
+ resources:
+ - "configmaps"
+ - "secrets"
+ verbs:
+ - get
+ - list
+ - watch
+ # needed for otelcol.processor.k8sattributes
+ - apiGroups: ["apps"]
+ resources: ["replicasets"]
+ verbs: ["get", "list", "watch"]
+ - apiGroups: ["extensions"]
+ resources: ["replicasets"]
+ verbs: ["get", "list", "watch"]
+---
+# Source: coder-observability/charts/grafana/templates/clusterrole.yaml
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ annotations:
+ prometheus.io/scrape: "true"
+ name: grafana-clusterrole
+rules: []
+---
+# Source: coder-observability/charts/loki/templates/backend/clusterrole.yaml
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ name: loki-clusterrole
+rules:
+ - apiGroups: [""] # "" indicates the core API group
+ resources: ["configmaps", "secrets"]
+ verbs: ["get", "watch", "list"]
+---
+# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/role.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: kube-state-metrics
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+ name: kube-state-metrics
+rules:
+ - apiGroups: ["certificates.k8s.io"]
+ resources:
+ - certificatesigningrequests
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - configmaps
+ verbs: ["list", "watch"]
+ - apiGroups: ["batch"]
+ resources:
+ - cronjobs
+ verbs: ["list", "watch"]
+ - apiGroups: ["extensions", "apps"]
+ resources:
+ - daemonsets
+ verbs: ["list", "watch"]
+ - apiGroups: ["extensions", "apps"]
+ resources:
+ - deployments
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - endpoints
+ verbs: ["list", "watch"]
+ - apiGroups: ["autoscaling"]
+ resources:
+ - horizontalpodautoscalers
+ verbs: ["list", "watch"]
+ - apiGroups: ["extensions", "networking.k8s.io"]
+ resources:
+ - ingresses
+ verbs: ["list", "watch"]
+ - apiGroups: ["batch"]
+ resources:
+ - jobs
+ verbs: ["list", "watch"]
+ - apiGroups: ["coordination.k8s.io"]
+ resources:
+ - leases
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - limitranges
+ verbs: ["list", "watch"]
+ - apiGroups: ["admissionregistration.k8s.io"]
+ resources:
+ - mutatingwebhookconfigurations
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - namespaces
+ verbs: ["list", "watch"]
+ - apiGroups: ["networking.k8s.io"]
+ resources:
+ - networkpolicies
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - nodes
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - persistentvolumeclaims
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - persistentvolumes
+ verbs: ["list", "watch"]
+ - apiGroups: ["policy"]
+ resources:
+ - poddisruptionbudgets
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - pods
+ verbs: ["list", "watch"]
+ - apiGroups: ["extensions", "apps"]
+ resources:
+ - replicasets
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - replicationcontrollers
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - resourcequotas
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - secrets
+ verbs: ["list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - services
+ verbs: ["list", "watch"]
+ - apiGroups: ["apps"]
+ resources:
+ - statefulsets
+ verbs: ["list", "watch"]
+ - apiGroups: ["storage.k8s.io"]
+ resources:
+ - storageclasses
+ verbs: ["list", "watch"]
+ - apiGroups: ["admissionregistration.k8s.io"]
+ resources:
+ - validatingwebhookconfigurations
+ verbs: ["list", "watch"]
+ - apiGroups: ["storage.k8s.io"]
+ resources:
+ - volumeattachments
+ verbs: ["list", "watch"]
+---
+# Source: coder-observability/charts/prometheus/templates/clusterrole.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus
+rules:
+ - apiGroups:
+ - ""
+ resources:
+ - nodes
+ - nodes/proxy
+ - nodes/metrics
+ - services
+ - endpoints
+ - pods
+ - ingresses
+ - configmaps
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - "extensions"
+ - "networking.k8s.io"
+ resources:
+ - ingresses/status
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - "discovery.k8s.io"
+ resources:
+ - endpointslices
+ verbs:
+ - get
+ - list
+ - watch
+ - nonResourceURLs:
+ - "/metrics"
+ verbs:
+ - get
+---
+# Source: coder-observability/charts/grafana-agent/templates/rbac.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: grafana-agent
+ labels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: grafana-agent
+subjects:
+ - kind: ServiceAccount
+ name: grafana-agent
+ namespace: coder-observability
+---
+# Source: coder-observability/charts/grafana/templates/clusterrolebinding.yaml
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: grafana-clusterrolebinding
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ annotations:
+ prometheus.io/scrape: "true"
+subjects:
+ - kind: ServiceAccount
+ name: grafana
+ namespace: coder-observability
+roleRef:
+ kind: ClusterRole
+ name: grafana-clusterrole
+ apiGroup: rbac.authorization.k8s.io
+---
+# Source: coder-observability/charts/loki/templates/backend/clusterrolebinding.yaml
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: loki-clusterrolebinding
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+subjects:
+ - kind: ServiceAccount
+ name: loki
+ namespace: coder-observability
+roleRef:
+ kind: ClusterRole
+ name: loki-clusterrole
+ apiGroup: rbac.authorization.k8s.io
+---
+# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/clusterrolebinding.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: kube-state-metrics
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+ name: kube-state-metrics
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: kube-state-metrics
+subjects:
+ - kind: ServiceAccount
+ name: kube-state-metrics
+ namespace: coder-observability
+---
+# Source: coder-observability/charts/prometheus/templates/clusterrolebinding.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus
+subjects:
+ - kind: ServiceAccount
+ name: prometheus
+ namespace: coder-observability
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: prometheus
+---
+# Source: coder-observability/charts/grafana/templates/role.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ name: grafana
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ annotations:
+ prometheus.io/scrape: "true"
+rules: []
+---
+# Source: coder-observability/charts/grafana/templates/rolebinding.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ name: grafana
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ annotations:
+ prometheus.io/scrape: "true"
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: grafana
+subjects:
+ - kind: ServiceAccount
+ name: grafana
+ namespace: coder-observability
+---
+# Source: coder-observability/charts/grafana-agent/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: grafana-agent
+ labels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ selector:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ internalTrafficPolicy: Cluster
+ ports:
+ - name: http-metrics
+ port: 80
+ targetPort: 80
+ protocol: "TCP"
+---
+# Source: coder-observability/charts/grafana/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: grafana
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - name: service
+ port: 80
+ protocol: TCP
+ targetPort: 3000
+ selector:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/console-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-storage-console
+ namespace: "coder-observability"
+ labels:
+ app: minio
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+spec:
+ type: ClusterIP
+ ports:
+ - name: http
+ port: 9001
+ protocol: TCP
+ targetPort: 9001
+ selector:
+ app: minio
+ release: coder-observability
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-storage
+ namespace: "coder-observability"
+ labels:
+ app: minio
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+ monitoring: "true"
+spec:
+ type: ClusterIP
+ ports:
+ - name: http
+ port: 9000
+ protocol: TCP
+ targetPort: 9000
+ selector:
+ app: minio
+ release: coder-observability
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/statefulset.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-storage-svc
+ namespace: "coder-observability"
+ labels:
+ app: minio
+ chart: minio-4.0.15
+ release: "coder-observability"
+ heritage: "Helm"
+spec:
+ publishNotReadyAddresses: true
+ clusterIP: None
+ ports:
+ - name: http
+ port: 9000
+ protocol: TCP
+ targetPort: 9000
+ selector:
+ app: minio
+ release: coder-observability
+---
+# Source: coder-observability/charts/loki/templates/backend/query-scheduler-discovery.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-query-scheduler-discovery
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+ prometheus.io/service-monitor: "false"
+spec:
+ type: ClusterIP
+ clusterIP: None
+ publishNotReadyAddresses: true
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+---
+# Source: coder-observability/charts/loki/templates/backend/service-backend-headless.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-backend-headless
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+ variant: headless
+ prometheus.io/service-monitor: "false"
+ annotations:
+spec:
+ type: ClusterIP
+ clusterIP: None
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+---
+# Source: coder-observability/charts/loki/templates/backend/service-backend.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-backend
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: backend
+ annotations:
+spec:
+ type: ClusterIP
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+---
+# Source: coder-observability/charts/loki/templates/chunks-cache/service-chunks-cache-headless.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-chunks-cache
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: "memcached-chunks-cache"
+ annotations: {}
+ namespace: "coder-observability"
+spec:
+ type: ClusterIP
+ clusterIP: None
+ ports:
+ - name: memcached-client
+ port: 11211
+ targetPort: 11211
+ - name: http-metrics
+ port: 9150
+ targetPort: 9150
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: "memcached-chunks-cache"
+---
+# Source: coder-observability/charts/loki/templates/gateway/service-gateway.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-gateway
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: gateway
+ annotations:
+spec:
+ type: ClusterIP
+ ports:
+ - name: http-metrics
+ port: 80
+ targetPort: http-metrics
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: gateway
+---
+# Source: coder-observability/charts/loki/templates/loki-canary/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-canary
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: canary
+ annotations:
+spec:
+ type: ClusterIP
+ ports:
+ - name: http-metrics
+ port: 3500
+ targetPort: http-metrics
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: canary
+---
+# Source: coder-observability/charts/loki/templates/read/service-read-headless.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-read-headless
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: read
+ variant: headless
+ prometheus.io/service-monitor: "false"
+ annotations:
+spec:
+ type: ClusterIP
+ clusterIP: None
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ appProtocol: tcp
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: read
+---
+# Source: coder-observability/charts/loki/templates/read/service-read.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-read
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: read
+ annotations:
+spec:
+ type: ClusterIP
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: read
+---
+# Source: coder-observability/charts/loki/templates/results-cache/service-results-cache-headless.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-results-cache
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: "memcached-results-cache"
+ annotations: {}
+ namespace: "coder-observability"
+spec:
+ type: ClusterIP
+ clusterIP: None
+ ports:
+ - name: memcached-client
+ port: 11211
+ targetPort: 11211
+ - name: http-metrics
+ port: 9150
+ targetPort: 9150
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: "memcached-results-cache"
+---
+# Source: coder-observability/charts/loki/templates/service-memberlist.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-memberlist
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+spec:
+ type: ClusterIP
+ clusterIP: None
+ ports:
+ - name: tcp
+ port: 7946
+ targetPort: http-memberlist
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/part-of: memberlist
+---
+# Source: coder-observability/charts/loki/templates/write/service-write-headless.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-write-headless
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: write
+ variant: headless
+ prometheus.io/service-monitor: "false"
+ annotations:
+spec:
+ type: ClusterIP
+ clusterIP: None
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ appProtocol: tcp
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: write
+---
+# Source: coder-observability/charts/loki/templates/write/service-write.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: loki-write
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: write
+ annotations:
+spec:
+ type: ClusterIP
+ ports:
+ - name: http-metrics
+ port: 3100
+ targetPort: http-metrics
+ protocol: TCP
+ - name: grpc
+ port: 9095
+ targetPort: grpc
+ protocol: TCP
+ selector:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: write
+---
+# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/services.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: alertmanager
+ labels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ namespace: coder-observability
+spec:
+ type: ClusterIP
+ ports:
+ - port: 80
+ targetPort: http
+ protocol: TCP
+ name: http
+ selector:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+---
+# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/services.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: alertmanager-headless
+ labels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ namespace: coder-observability
+spec:
+ clusterIP: None
+ ports:
+ - port: 80
+ targetPort: http
+ protocol: TCP
+ name: http
+ selector:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+---
+# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: kube-state-metrics
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: kube-state-metrics
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+ annotations:
+ prometheus.io/scrape: 'true'
+spec:
+ type: "ClusterIP"
+ ports:
+ - name: "http"
+ protocol: TCP
+ port: 8080
+ targetPort: 8080
+ selector:
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+---
+# Source: coder-observability/charts/prometheus/charts/prometheus-node-exporter/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: node-exporter
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: prometheus-node-exporter
+ app.kubernetes.io/name: prometheus-node-exporter
+ app.kubernetes.io/instance: coder-observability
+ annotations:
+ prometheus.io/scrape: "true"
+spec:
+ type: ClusterIP
+ ports:
+ - port: 9100
+ targetPort: 9100
+ protocol: TCP
+ name: metrics
+ selector:
+ app.kubernetes.io/name: prometheus-node-exporter
+ app.kubernetes.io/instance: coder-observability
+---
+# Source: coder-observability/charts/prometheus/templates/headless-svc.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus-headless
+ namespace: coder-observability
+spec:
+ clusterIP: None
+ ports:
+ - name: http
+ port: 80
+ protocol: TCP
+ targetPort: 9090
+ selector:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+---
+# Source: coder-observability/charts/prometheus/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus
+ namespace: coder-observability
+spec:
+ ports:
+ - name: http
+ port: 80
+ protocol: TCP
+ targetPort: 9090
+ selector:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ sessionAffinity: None
+ type: "ClusterIP"
+---
+# Source: coder-observability/templates/service-runbook-viewer.yaml
+apiVersion: v1
+kind: Service
+metadata:
+ name: runbook-viewer
+spec:
+ ports:
+ - port: 80
+ targetPort: 3000
+ protocol: TCP
+ selector:
+ app: runbook-viewer
+---
+# Source: coder-observability/charts/grafana-agent/templates/controllers/daemonset.yaml
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: grafana-agent
+ labels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+spec:
+ minReadySeconds: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ template:
+ metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: grafana-agent
+ prometheus.io/scrape: "true"
+ labels:
+ app.kubernetes.io/name: grafana-agent
+ app.kubernetes.io/instance: coder-observability
+ spec:
+ serviceAccountName: grafana-agent
+ containers:
+ - name: grafana-agent
+ imagePullPolicy: IfNotPresent
+ args:
+ - run
+ - /etc/agent/config.river
+ - --storage.path=/tmp/agent
+ - --server.http.listen-addr=0.0.0.0:80
+ - --server.http.ui-path-prefix=/
+ - --disable-reporting=true
+ env:
+ - name: AGENT_MODE
+ value: flow
+ - name: AGENT_DEPLOY_MODE
+ value: "helm"
+ - name: HOSTNAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ ports:
+ - containerPort: 80
+ name: http-metrics
+ readinessProbe:
+ httpGet:
+ path: /-/ready
+ port: 80
+ scheme: HTTP
+ initialDelaySeconds: 10
+ timeoutSeconds: 1
+ volumeMounts:
+ - name: config
+ mountPath: /etc/agent
+ - name: varlog
+ mountPath: /var/log
+ readOnly: true
+ - name: dockercontainers
+ mountPath: /var/lib/docker/containers
+ readOnly: true
+ - name: config-reloader
+ args:
+ - --volume-dir=/etc/agent
+ - --webhook-url=http://localhost:80/-/reload
+ volumeMounts:
+ - name: config
+ mountPath: /etc/agent
+ resources:
+ requests:
+ cpu: 1m
+ memory: 5Mi
+ dnsPolicy: ClusterFirst
+ volumes:
+ - name: config
+ configMap:
+ name: collector-config
+ - name: varlog
+ hostPath:
+ path: /var/log
+ - name: dockercontainers
+ hostPath:
+ path: /var/lib/docker/containers
+---
+# Source: coder-observability/charts/loki/templates/loki-canary/daemonset.yaml
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: loki-canary
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: canary
+spec:
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: canary
+ updateStrategy:
+ rollingUpdate:
+ maxUnavailable: 1
+ type: RollingUpdate
+ template:
+ metadata:
+ annotations:
+ prometheus.io/scrape: "true"
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: canary
+ spec:
+ serviceAccountName: loki-canary
+ securityContext:
+ fsGroup: 10001
+ runAsGroup: 10001
+ runAsNonRoot: true
+ runAsUser: 10001
+ containers:
+ - name: loki-canary
+ imagePullPolicy: IfNotPresent
+ args:
+ - -addr=loki-gateway.coder-observability.svc.cluster.local.:80
+ - -labelname=pod
+ - -labelvalue=$(POD_NAME)
+ - -push=true
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ volumeMounts:
+ ports:
+ - name: http-metrics
+ containerPort: 3500
+ protocol: TCP
+ env:
+ - name: POD_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.name
+ readinessProbe:
+ httpGet:
+ path: /metrics
+ port: http-metrics
+ initialDelaySeconds: 15
+ timeoutSeconds: 1
+ volumes:
+---
+# Source: coder-observability/charts/prometheus/charts/prometheus-node-exporter/templates/daemonset.yaml
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: node-exporter
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: prometheus-node-exporter
+ app.kubernetes.io/name: prometheus-node-exporter
+ app.kubernetes.io/instance: coder-observability
+spec:
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus-node-exporter
+ app.kubernetes.io/instance: coder-observability
+ revisionHistoryLimit: 10
+ updateStrategy:
+ rollingUpdate:
+ maxUnavailable: 1
+ type: RollingUpdate
+ template:
+ metadata:
+ annotations:
+ cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
+ prometheus.io/scrape: "true"
+ labels:
+ helm.sh/chart: prometheus-node-exporter-4.37.0
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: prometheus-node-exporter
+ app.kubernetes.io/name: prometheus-node-exporter
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/version: "1.8.1"
+ spec:
+ automountServiceAccountToken: false
+ securityContext:
+ fsGroup: 65534
+ runAsGroup: 65534
+ runAsNonRoot: true
+ runAsUser: 65534
+ serviceAccountName: node-exporter
+ containers:
+ - name: node-exporter
+ imagePullPolicy: IfNotPresent
+ args:
+ - --path.procfs=/host/proc
+ - --path.sysfs=/host/sys
+ - --path.rootfs=/host/root
+ - --path.udev.data=/host/root/run/udev/data
+ - --web.listen-address=[$(HOST_IP)]:9100
+ securityContext:
+ allowPrivilegeEscalation: false
+ readOnlyRootFilesystem: true
+ env:
+ - name: HOST_IP
+ value: 0.0.0.0
+ ports:
+ - name: metrics
+ containerPort: 9100
+ protocol: TCP
+ livenessProbe:
+ failureThreshold: 3
+ httpGet:
+ httpHeaders:
+ path: /
+ port: 9100
+ scheme: HTTP
+ initialDelaySeconds: 0
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 1
+ readinessProbe:
+ failureThreshold: 3
+ httpGet:
+ httpHeaders:
+ path: /
+ port: 9100
+ scheme: HTTP
+ initialDelaySeconds: 0
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 1
+ volumeMounts:
+ - name: proc
+ mountPath: /host/proc
+ readOnly: true
+ - name: sys
+ mountPath: /host/sys
+ readOnly: true
+ - name: root
+ mountPath: /host/root
+ mountPropagation: HostToContainer
+ readOnly: true
+ hostNetwork: true
+ hostPID: true
+ nodeSelector:
+ kubernetes.io/os: linux
+ tolerations:
+ - effect: NoSchedule
+ operator: Exists
+ volumes:
+ - name: proc
+ hostPath:
+ path: /proc
+ - name: sys
+ hostPath:
+ path: /sys
+ - name: root
+ hostPath:
+ path: /
+---
+# Source: coder-observability/charts/loki/templates/gateway/deployment-gateway-nginx.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: loki-gateway
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: gateway
+spec:
+ replicas: 1
+ strategy:
+ type: RollingUpdate
+ revisionHistoryLimit: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: gateway
+ template:
+ metadata:
+ annotations:
+ checksum/config: bc7add19cdc0df1566dec1bf8f9421082357d4393124d6ea2df28d7e5888cc8a
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: gateway
+ spec:
+ serviceAccountName: loki
+ enableServiceLinks: true
+ securityContext:
+ fsGroup: 101
+ runAsGroup: 101
+ runAsNonRoot: true
+ runAsUser: 101
+ terminationGracePeriodSeconds: 30
+ containers:
+ - name: nginx
+ imagePullPolicy: IfNotPresent
+ ports:
+ - name: http-metrics
+ containerPort: 8080
+ protocol: TCP
+ readinessProbe:
+ httpGet:
+ path: /
+ port: http-metrics
+ initialDelaySeconds: 15
+ timeoutSeconds: 1
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ volumeMounts:
+ - name: config
+ mountPath: /etc/nginx
+ - name: tmp
+ mountPath: /tmp
+ - name: docker-entrypoint-d-override
+ mountPath: /docker-entrypoint.d
+ resources: {}
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchLabels:
+ app.kubernetes.io/component: gateway
+ topologyKey: kubernetes.io/hostname
+ volumes:
+ - name: config
+ configMap:
+ name: loki-gateway
+ - name: tmp
+ emptyDir: {}
+ - name: docker-entrypoint-d-override
+ emptyDir: {}
+---
+# Source: coder-observability/charts/loki/templates/read/deployment-read.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: loki-read
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/part-of: memberlist
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: read
+spec:
+ replicas: 1
+ strategy:
+ rollingUpdate:
+ maxSurge: 0
+ maxUnavailable: 1
+ revisionHistoryLimit: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: read
+ template:
+ metadata:
+ annotations:
+ checksum/config: 4dbe50185304244ab527314b7723d048ea4544f97d0a4d8e0192863861811005
+ prometheus.io/scrape: "true"
+ labels:
+ app.kubernetes.io/part-of: memberlist
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: read
+ spec:
+ serviceAccountName: loki
+ automountServiceAccountToken: true
+ securityContext:
+ fsGroup: 10001
+ runAsGroup: 10001
+ runAsNonRoot: true
+ runAsUser: 10001
+ terminationGracePeriodSeconds: 30
+ containers:
+ - name: loki
+ imagePullPolicy: IfNotPresent
+ args:
+ - -config.file=/etc/loki/config/config.yaml
+ - -target=read
+ - -legacy-read-mode=false
+ - -common.compactor-grpc-address=loki-backend.coder-observability.svc.cluster.local:9095
+ ports:
+ - name: http-metrics
+ containerPort: 3100
+ protocol: TCP
+ - name: grpc
+ containerPort: 9095
+ protocol: TCP
+ - name: http-memberlist
+ containerPort: 7946
+ protocol: TCP
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ readinessProbe:
+ httpGet:
+ path: /ready
+ port: http-metrics
+ initialDelaySeconds: 30
+ timeoutSeconds: 1
+ volumeMounts:
+ - name: config
+ mountPath: /etc/loki/config
+ - name: runtime-config
+ mountPath: /etc/loki/runtime-config
+ - name: tmp
+ mountPath: /tmp
+ - name: data
+ mountPath: /var/loki
+ resources: {}
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchLabels:
+ app.kubernetes.io/component: read
+ topologyKey: kubernetes.io/hostname
+ volumes:
+ - name: tmp
+ emptyDir: {}
+ - name: data
+ emptyDir: {}
+ - name: config
+ configMap:
+ name: loki
+ items:
+ - key: "config.yaml"
+ path: "config.yaml"
+ - name: runtime-config
+ configMap:
+ name: loki-runtime
+---
+# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: kube-state-metrics
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: kube-state-metrics
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+spec:
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+ replicas: 1
+ strategy:
+ type: RollingUpdate
+ revisionHistoryLimit: 10
+ template:
+ metadata:
+ labels:
+ helm.sh/chart: kube-state-metrics-5.21.0
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: metrics
+ app.kubernetes.io/part-of: kube-state-metrics
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/version: "2.12.0"
+ annotations:
+ prometheus.io/scrape: "true"
+ spec:
+ automountServiceAccountToken: true
+ hostNetwork: false
+ serviceAccountName: kube-state-metrics
+ securityContext:
+ fsGroup: 65534
+ runAsGroup: 65534
+ runAsNonRoot: true
+ runAsUser: 65534
+ seccompProfile:
+ type: RuntimeDefault
+ containers:
+ - name: kube-state-metrics
+ args:
+ - --port=8080
+ - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 8080
+ name: "http"
+ livenessProbe:
+ failureThreshold: 3
+ httpGet:
+ httpHeaders:
+ path: /healthz
+ port: 8080
+ scheme: HTTP
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 5
+ readinessProbe:
+ failureThreshold: 3
+ httpGet:
+ httpHeaders:
+ path: /
+ port: 8080
+ scheme: HTTP
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 5
+ resources: {}
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+---
+# Source: coder-observability/charts/grafana/templates/statefulset.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: grafana
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ annotations:
+ prometheus.io/scrape: "true"
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ serviceName: grafana-headless
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/instance: coder-observability
+ annotations:
+ checksum/config: 2828a490315379b00f2116ebe6a20dd3ca9a4d5ce5839f037c1eb0a4501ecb18
+ checksum/dashboards-json-config: 010b57348b6dd1f09007330c03d22a0570022534712646511cad39a9e3cb4bb7
+ checksum/sc-dashboard-provider-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
+ kubectl.kubernetes.io/default-container: grafana
+ spec:
+ serviceAccountName: grafana
+ automountServiceAccountToken: true
+ securityContext:
+ fsGroup: 472
+ runAsGroup: 472
+ runAsNonRoot: true
+ runAsUser: 472
+ initContainers:
+ - name: init-chown-data
+ image: "docker.io/library/busybox:1.31.1"
+ imagePullPolicy: IfNotPresent
+ securityContext:
+ capabilities:
+ add:
+ - CHOWN
+ runAsNonRoot: false
+ runAsUser: 0
+ seccompProfile:
+ type: RuntimeDefault
+ command:
+ - chown
+ - -R
+ - 472:472
+ - /var/lib/grafana
+ volumeMounts:
+ - name: storage
+ mountPath: "/var/lib/grafana"
+ - name: download-dashboards
+ image: "docker.io/curlimages/curl:7.85.0"
+ imagePullPolicy: IfNotPresent
+ command: ["/bin/sh"]
+ args: ["-c", "mkdir -p /var/lib/grafana/dashboards/default && /bin/sh -x /etc/grafana/download_dashboards.sh"]
+ env:
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ seccompProfile:
+ type: RuntimeDefault
+ volumeMounts:
+ - name: config
+ mountPath: "/etc/grafana/download_dashboards.sh"
+ subPath: download_dashboards.sh
+ - name: storage
+ mountPath: "/var/lib/grafana"
+ enableServiceLinks: true
+ containers:
+ - name: grafana
+ imagePullPolicy: IfNotPresent
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ seccompProfile:
+ type: RuntimeDefault
+ volumeMounts:
+ - name: config
+ mountPath: "/etc/grafana/grafana.ini"
+ subPath: grafana.ini
+ - name: dashboards-status
+ mountPath: /var/lib/grafana/dashboards/coder/0
+ subPath:
+ readOnly: false
+ - name: dashboards-coderd
+ mountPath: /var/lib/grafana/dashboards/coder/1
+ subPath:
+ readOnly: false
+ - name: dashboards-provisionerd
+ mountPath: /var/lib/grafana/dashboards/coder/2
+ subPath:
+ readOnly: false
+ - name: dashboards-workspaces
+ mountPath: /var/lib/grafana/dashboards/coder/3
+ subPath:
+ readOnly: false
+ - name: dashboards-workspace-detail
+ mountPath: /var/lib/grafana/dashboards/coder/4
+ subPath:
+ readOnly: false
+ - name: dashboards-prebuilds
+ mountPath: /var/lib/grafana/dashboards/coder/5
+ subPath:
+ readOnly: false
+ - name: storage
+ mountPath: "/var/lib/grafana"
+ - name: config
+ mountPath: "/etc/grafana/provisioning/datasources/datasources.yaml"
+ subPath: "datasources.yaml"
+ - name: config
+ mountPath: "/etc/grafana/provisioning/dashboards/coder.yaml"
+ subPath: "coder.yaml"
+ - name: config
+ mountPath: "/etc/grafana/provisioning/dashboards/infra.yaml"
+ subPath: "infra.yaml"
+ - name: config
+ mountPath: "/etc/grafana/provisioning/dashboards/sidecar.yaml"
+ subPath: "sidecar.yaml"
+ ports:
+ - name: grafana
+ containerPort: 3000
+ protocol: TCP
+ - name: gossip-tcp
+ containerPort: 9094
+ protocol: TCP
+ - name: gossip-udp
+ containerPort: 9094
+ protocol: UDP
+ env:
+ - name: POD_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.podIP
+ - name: GF_PATHS_DATA
+ value: /var/lib/grafana/
+ - name: GF_PATHS_LOGS
+ value: /var/log/grafana
+ - name: GF_PATHS_PLUGINS
+ value: /var/lib/grafana/plugins
+ - name: GF_PATHS_PROVISIONING
+ value: /etc/grafana/provisioning
+ - name: "GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION"
+ value: "true"
+ livenessProbe:
+ failureThreshold: 10
+ httpGet:
+ path: /api/health
+ port: 3000
+ initialDelaySeconds: 60
+ timeoutSeconds: 30
+ readinessProbe:
+ httpGet:
+ path: /api/health
+ port: 3000
+ volumes:
+ - name: config
+ configMap:
+ name: grafana
+ - name: dashboards-status
+ configMap:
+ name: dashboards-status
+ - name: dashboards-coderd
+ configMap:
+ name: dashboards-coderd
+ - name: dashboards-provisionerd
+ configMap:
+ name: dashboards-provisionerd
+ - name: dashboards-workspaces
+ configMap:
+ name: dashboards-workspaces
+ - name: dashboards-workspace-detail
+ configMap:
+ name: dashboards-workspace-detail
+ - name: dashboards-prebuilds
+ configMap:
+ name: dashboards-prebuilds
+ - name: dashboards-infra
+ configMap:
+ name: grafana-dashboards-infra
+ - name: storage
+ persistentVolumeClaim:
+ claimName: grafana
+ volumeClaimTemplates:
+ - metadata:
+ name: storage
+ spec:
+ accessModes: [ReadWriteOnce]
+ storageClassName:
+ resources:
+ requests:
+ storage: 10Gi
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/statefulset.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: loki-storage
+ namespace: "coder-observability"
+ labels:
+ app: minio
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+spec:
+ updateStrategy:
+ type: RollingUpdate
+ podManagementPolicy: "Parallel"
+ serviceName: loki-storage-svc
+ replicas: 1
+ selector:
+ matchLabels:
+ app: minio
+ release: coder-observability
+ template:
+ metadata:
+ name: loki-storage
+ labels:
+ app: minio
+ release: coder-observability
+ app.kubernetes.io/name: loki-storage
+ annotations:
+ checksum/secrets: 982fb92f094edb3a3a156ec880923b557a774f885bf38c7d14d92c08f1d1257d
+ checksum/config: 876a07a82a63058ee3cc32fd1988af1e51b44e1d25825387ad4ecfdde9199417
+ prometheus.io/path: /minio/v2/metrics/cluster
+ prometheus.io/scrape: "true"
+ spec:
+ securityContext:
+ runAsUser: 1000
+ runAsGroup: 1000
+ fsGroup: 1000
+ fsGroupChangePolicy: OnRootMismatch
+ serviceAccountName: minio-sa
+ containers:
+ - name: minio
+ imagePullPolicy: IfNotPresent
+ command: ["/bin/sh", "-ce", "/usr/bin/docker-entrypoint.sh minio server http://loki-storage-{0...0}.loki-storage-svc.coder-observability.svc.cluster.local/export-{0...1} -S /etc/minio/certs/ --address :9000 --console-address :9001"]
+ volumeMounts:
+ - name: export-0
+ mountPath: /export-0
+ - name: export-1
+ mountPath: /export-1
+ ports:
+ - name: http
+ containerPort: 9000
+ - name: http-console
+ containerPort: 9001
+ env:
+ - name: MINIO_ROOT_USER
+ valueFrom:
+ secretKeyRef:
+ name: loki-storage
+ key: rootUser
+ - name: MINIO_ROOT_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: loki-storage
+ key: rootPassword
+ - name: MINIO_PROMETHEUS_AUTH_TYPE
+ value: "public"
+ resources:
+ requests:
+ cpu: 100m
+ memory: 128Mi
+ volumes:
+ - name: minio-user
+ secret:
+ secretName: loki-storage
+ volumeClaimTemplates:
+ - metadata:
+ name: export-0
+ spec:
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ storage: 5Gi
+ - metadata:
+ name: export-1
+ spec:
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ storage: 5Gi
+---
+# Source: coder-observability/charts/loki/templates/backend/statefulset-backend.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: loki-backend
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: backend
+ app.kubernetes.io/part-of: memberlist
+spec:
+ replicas: 1
+ podManagementPolicy: Parallel
+ updateStrategy:
+ rollingUpdate:
+ partition: 0
+ serviceName: loki-backend-headless
+ revisionHistoryLimit: 10
+ persistentVolumeClaimRetentionPolicy:
+ whenDeleted: Delete
+ whenScaled: Delete
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+ template:
+ metadata:
+ annotations:
+ checksum/config: 4dbe50185304244ab527314b7723d048ea4544f97d0a4d8e0192863861811005
+ prometheus.io/scrape: "true"
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: backend
+ app.kubernetes.io/part-of: memberlist
+ spec:
+ serviceAccountName: loki
+ automountServiceAccountToken: true
+ securityContext:
+ fsGroup: 10001
+ runAsGroup: 10001
+ runAsNonRoot: true
+ runAsUser: 10001
+ terminationGracePeriodSeconds: 300
+ containers:
+ - name: loki-sc-rules
+ imagePullPolicy: IfNotPresent
+ env:
+ - name: METHOD
+ value: WATCH
+ - name: LABEL
+ value: "loki_rule"
+ - name: FOLDER
+ value: "/rules/fake"
+ - name: RESOURCE
+ value: "both"
+ - name: WATCH_SERVER_TIMEOUT
+ value: "60"
+ - name: WATCH_CLIENT_TIMEOUT
+ value: "60"
+ - name: LOG_LEVEL
+ value: "DEBUG"
+ volumeMounts:
+ - name: sc-rules-volume
+ mountPath: "/rules/fake"
+ - name: loki
+ imagePullPolicy: IfNotPresent
+ args:
+ - -config.file=/etc/loki/config/config.yaml
+ - -target=backend
+ - -legacy-read-mode=false
+ - -log.level=debug
+ ports:
+ - name: http-metrics
+ containerPort: 3100
+ protocol: TCP
+ - name: grpc
+ containerPort: 9095
+ protocol: TCP
+ - name: http-memberlist
+ containerPort: 7946
+ protocol: TCP
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ readinessProbe:
+ httpGet:
+ path: /ready
+ port: http-metrics
+ initialDelaySeconds: 30
+ timeoutSeconds: 1
+ volumeMounts:
+ - name: config
+ mountPath: /etc/loki/config
+ - name: runtime-config
+ mountPath: /etc/loki/runtime-config
+ - name: tmp
+ mountPath: /tmp
+ - name: data
+ mountPath: /var/loki
+ - name: sc-rules-volume
+ mountPath: "/rules/fake"
+ - mountPath: /var/loki-ruler-wal
+ name: ruler-wal
+ resources: {}
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchLabels:
+ app.kubernetes.io/component: backend
+ topologyKey: kubernetes.io/hostname
+ volumes:
+ - name: tmp
+ emptyDir: {}
+ - name: config
+ configMap:
+ name: loki
+ items:
+ - key: "config.yaml"
+ path: "config.yaml"
+ - name: runtime-config
+ configMap:
+ name: loki-runtime
+ - name: sc-rules-volume
+ emptyDir: {}
+ - emptyDir: {}
+ name: ruler-wal
+ volumeClaimTemplates:
+ - apiVersion: v1
+ kind: PersistentVolumeClaim
+ metadata:
+ name: data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: "10Gi"
+---
+# Source: coder-observability/charts/loki/templates/chunks-cache/statefulset-chunks-cache.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: loki-chunks-cache
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: "memcached-chunks-cache"
+ name: "memcached-chunks-cache"
+ annotations: {}
+ namespace: "coder-observability"
+spec:
+ podManagementPolicy: Parallel
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: "memcached-chunks-cache"
+ name: "memcached-chunks-cache"
+ updateStrategy:
+ type: RollingUpdate
+ serviceName: loki-chunks-cache
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: "memcached-chunks-cache"
+ name: "memcached-chunks-cache"
+ annotations:
+ spec:
+ serviceAccountName: loki
+ securityContext: {}
+ initContainers: []
+ nodeSelector: {}
+ affinity: {}
+ topologySpreadConstraints: []
+ tolerations: []
+ terminationGracePeriodSeconds: 60
+ containers:
+ - name: memcached
+ imagePullPolicy: IfNotPresent
+ resources:
+ limits:
+ memory: 1229Mi
+ requests:
+ cpu: 500m
+ memory: 1229Mi
+ ports:
+ - containerPort: 11211
+ name: client
+ args:
+ - -m 1024
+ - --extended=modern,track_sizes
+ - -I 5m
+ - -c 16384
+ - -v
+ - -u 11211
+ env:
+ envFrom:
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ - name: exporter
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 9150
+ name: http-metrics
+ args:
+ - "--memcached.address=localhost:11211"
+ - "--web.listen-address=0.0.0.0:9150"
+ resources:
+ limits: {}
+ requests: {}
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+---
+# Source: coder-observability/charts/loki/templates/results-cache/statefulset-results-cache.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: loki-results-cache
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: "memcached-results-cache"
+ name: "memcached-results-cache"
+ annotations: {}
+ namespace: "coder-observability"
+spec:
+ podManagementPolicy: Parallel
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: "memcached-results-cache"
+ name: "memcached-results-cache"
+ updateStrategy:
+ type: RollingUpdate
+ serviceName: loki-results-cache
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: "memcached-results-cache"
+ name: "memcached-results-cache"
+ annotations:
+ spec:
+ serviceAccountName: loki
+ securityContext: {}
+ initContainers: []
+ nodeSelector: {}
+ affinity: {}
+ topologySpreadConstraints: []
+ tolerations: []
+ terminationGracePeriodSeconds: 60
+ containers:
+ - name: memcached
+ imagePullPolicy: IfNotPresent
+ resources:
+ limits:
+ memory: 1229Mi
+ requests:
+ cpu: 500m
+ memory: 1229Mi
+ ports:
+ - containerPort: 11211
+ name: client
+ args:
+ - -m 1024
+ - --extended=modern,track_sizes
+ - -I 5m
+ - -c 16384
+ - -v
+ - -u 11211
+ env:
+ envFrom:
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ - name: exporter
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 9150
+ name: http-metrics
+ args:
+ - "--memcached.address=localhost:11211"
+ - "--web.listen-address=0.0.0.0:9150"
+ resources:
+ limits: {}
+ requests: {}
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+---
+# Source: coder-observability/charts/loki/templates/write/statefulset-write.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: loki-write
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: write
+ app.kubernetes.io/part-of: memberlist
+spec:
+ replicas: 1
+ podManagementPolicy: Parallel
+ updateStrategy:
+ rollingUpdate:
+ partition: 0
+ serviceName: loki-write-headless
+ revisionHistoryLimit: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: write
+ template:
+ metadata:
+ annotations:
+ checksum/config: 4dbe50185304244ab527314b7723d048ea4544f97d0a4d8e0192863861811005
+ prometheus.io/scrape: "true"
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/component: write
+ app.kubernetes.io/part-of: memberlist
+ spec:
+ serviceAccountName: loki
+ automountServiceAccountToken: true
+ enableServiceLinks: true
+ securityContext:
+ fsGroup: 10001
+ runAsGroup: 10001
+ runAsNonRoot: true
+ runAsUser: 10001
+ terminationGracePeriodSeconds: 300
+ containers:
+ - name: loki
+ imagePullPolicy: IfNotPresent
+ args:
+ - -config.file=/etc/loki/config/config.yaml
+ - -target=write
+ - -log.level=debug
+ ports:
+ - name: http-metrics
+ containerPort: 3100
+ protocol: TCP
+ - name: grpc
+ containerPort: 9095
+ protocol: TCP
+ - name: http-memberlist
+ containerPort: 7946
+ protocol: TCP
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ readinessProbe:
+ httpGet:
+ path: /ready
+ port: http-metrics
+ initialDelaySeconds: 30
+ timeoutSeconds: 1
+ volumeMounts:
+ - name: config
+ mountPath: /etc/loki/config
+ - name: runtime-config
+ mountPath: /etc/loki/runtime-config
+ - name: data
+ mountPath: /var/loki
+ resources: {}
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchLabels:
+ app.kubernetes.io/component: write
+ topologyKey: kubernetes.io/hostname
+ volumes:
+ - name: config
+ configMap:
+ name: loki
+ items:
+ - key: "config.yaml"
+ path: "config.yaml"
+ - name: runtime-config
+ configMap:
+ name: loki-runtime
+ volumeClaimTemplates:
+ - apiVersion: v1
+ kind: PersistentVolumeClaim
+ metadata:
+ name: data
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: "10Gi"
+---
+# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/statefulset.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: alertmanager
+ labels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ namespace: coder-observability
+spec:
+ replicas: 1
+ minReadySeconds: 0
+ revisionHistoryLimit: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ serviceName: alertmanager-headless
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/instance: coder-observability
+ annotations:
+ checksum/config: 490f47b0d70495b76347dac06d9734b3074df82e0cc70a914ab7859d725f850b
+ prometheus.io/scrape: "true"
+ spec:
+ automountServiceAccountToken: true
+ serviceAccountName: alertmanager
+ securityContext:
+ fsGroup: 65534
+ runAsGroup: 65534
+ runAsNonRoot: true
+ runAsUser: 65534
+ containers:
+ - name: alertmanager
+ securityContext:
+ runAsGroup: 65534
+ runAsNonRoot: true
+ runAsUser: 65534
+ imagePullPolicy: IfNotPresent
+ env:
+ - name: POD_IP
+ valueFrom:
+ fieldRef:
+ apiVersion: v1
+ fieldPath: status.podIP
+ args:
+ - --storage.path=/alertmanager
+ - --config.file=/etc/alertmanager/alertmanager.yml
+ ports:
+ - name: http
+ containerPort: 9093
+ protocol: TCP
+ livenessProbe:
+ httpGet:
+ path: /
+ port: http
+ readinessProbe:
+ httpGet:
+ path: /
+ port: http
+ resources: {}
+ volumeMounts:
+ - name: config
+ mountPath: /etc/alertmanager
+ - name: storage
+ mountPath: /alertmanager
+ volumes:
+ - name: config
+ configMap:
+ name: alertmanager
+ volumeClaimTemplates:
+ - metadata:
+ name: storage
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 2Gi
+---
+# Source: coder-observability/charts/prometheus/templates/sts.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ name: prometheus
+ namespace: coder-observability
+spec:
+ persistentVolumeClaimRetentionPolicy:
+ whenDeleted: Retain
+ whenScaled: Retain
+ serviceName: prometheus-headless
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ replicas: 1
+ revisionHistoryLimit: 10
+ podManagementPolicy: OrderedReady
+ template:
+ metadata:
+ annotations:
+ prometheus.io/scrape: "true"
+ labels:
+ app.kubernetes.io/component: server
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/version: v2.53.1
+ helm.sh/chart: prometheus-25.24.2
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/part-of: prometheus
+ spec:
+ enableServiceLinks: true
+ serviceAccountName: prometheus
+ containers:
+ - name: prometheus-server-configmap-reload
+ imagePullPolicy: "IfNotPresent"
+ args:
+ - --watched-dir=/etc/config
+ - --listen-address=0.0.0.0:9091
+ - --reload-url=http://127.0.0.1:9090/-/reload
+ - --log-level=all
+ - --watch-interval=15s
+ ports:
+ - containerPort: 9091
+ name: metrics
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: metrics
+ scheme: HTTP
+ initialDelaySeconds: 2
+ periodSeconds: 10
+ readinessProbe:
+ httpGet:
+ path: /healthz
+ port: metrics
+ scheme: HTTP
+ periodSeconds: 10
+ startupProbe:
+ httpGet:
+ path: /healthz
+ port: metrics
+ scheme: HTTP
+ periodSeconds: 10
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/config
+ readOnly: true
+ - name: configmap-reload-alerts
+ mountPath: /etc/config/alerts
+ subPath:
+ readOnly:
+ - name: prometheus-server
+ imagePullPolicy: "IfNotPresent"
+ args:
+ - --storage.tsdb.retention.time=15d
+ - --storage.tsdb.retention.size=10GB
+ - --config.file=/etc/config/prometheus.yml
+ - --storage.tsdb.path=/data
+ - --web.console.libraries=/etc/prometheus/console_libraries
+ - --web.console.templates=/etc/prometheus/consoles
+ - --web.enable-lifecycle
+ - --enable-feature=remote-write-receiver
+ - --log.level=debug
+ ports:
+ - containerPort: 9090
+ readinessProbe:
+ httpGet:
+ path: /-/ready
+ port: 9090
+ scheme: HTTP
+ initialDelaySeconds: 30
+ periodSeconds: 5
+ timeoutSeconds: 4
+ failureThreshold: 3
+ successThreshold: 1
+ livenessProbe:
+ httpGet:
+ path: /-/healthy
+ port: 9090
+ scheme: HTTP
+ initialDelaySeconds: 30
+ periodSeconds: 15
+ timeoutSeconds: 10
+ failureThreshold: 3
+ successThreshold: 1
+ volumeMounts:
+ - name: config-volume
+ mountPath: /etc/config
+ - name: storage-volume
+ mountPath: /data
+ subPath: ""
+ - name: server-alerts
+ mountPath: /etc/config/alerts
+ subPath:
+ readOnly:
+ hostNetwork: false
+ dnsPolicy: ClusterFirst
+ securityContext:
+ fsGroup: 65534
+ runAsGroup: 65534
+ runAsNonRoot: true
+ runAsUser: 65534
+ terminationGracePeriodSeconds: 300
+ volumes:
+ - name: config-volume
+ configMap:
+ name: prometheus
+ - name: configmap-reload-alerts
+ configMap:
+ name: metrics-alerts
+ - name: server-alerts
+ configMap:
+ name: metrics-alerts
+ - name: alerts
+ configMap:
+ name: metrics-alerts
+ volumeClaimTemplates:
+ - apiVersion: v1
+ kind: PersistentVolumeClaim
+ metadata:
+ name: storage-volume
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: "12Gi"
+---
+# Source: coder-observability/templates/statefulset-postgres-exporter.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: postgres-exporter
+ namespace: coder-observability
+spec:
+ selector:
+ matchLabels:
+ app: postgres-exporter
+ serviceName: postgres-exporter
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ prometheus.io/scrape: 'true'
+ labels:
+ app: postgres-exporter
+ app.kubernetes.io/name: "database-stats"
+ spec:
+ containers:
+ - name: postgres-exporter
+ args:
+ - --collector.long_running_transactions
+ ports:
+ - containerPort: 9187
+ name: exporter
+ env:
+ - name: DATA_SOURCE_NAME
+ value: 'postgresql://coder@localhost:5432/coder?sslmode=disable'
+ envFrom:
+ - secretRef:
+ name: secret-postgres
+---
+# Source: coder-observability/templates/statefulset-runbook-viewer.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: runbook-viewer
+ namespace: coder-observability
+spec:
+ selector:
+ matchLabels:
+ app: runbook-viewer
+ serviceName: runbook-viewer
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ checksum/config: b0c41033d0385ee3d46488f08e85bcef0d939614dcb99194e0c5913dbf0c2c33
+ labels:
+ app: runbook-viewer
+ spec:
+ containers:
+ - name: madness
+ ports:
+ - containerPort: 3000
+ name: madness
+ args:
+ - server
+ volumeMounts:
+ - mountPath: /docs/
+ name: runbooks
+ volumes:
+ - name: runbooks
+ configMap:
+ name: runbooks
+---
+# Source: coder-observability/templates/statefulset-sql-exporter.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: sql-exporter
+ namespace: coder-observability
+spec:
+ selector:
+ matchLabels:
+ app: sql-exporter
+ serviceName: sql-exporter
+ replicas: 1
+ template:
+ metadata:
+ annotations:
+ prometheus.io/scrape: 'true'
+ checksum/config: 71bb9e7579b6e138ae28c623aa29d72025be00387da6c1b8dd5aa168c96ca1e0
+ labels:
+ app: sql-exporter
+ app.kubernetes.io/name: "database-stats"
+ spec:
+ containers:
+ - name: sql-exporter
+ args:
+ - -config.file=/cfg/config.yaml
+ ports:
+ - containerPort: 9399
+ name: exporter
+ volumeMounts:
+ - mountPath: /cfg/
+ name: config
+ envFrom:
+ - secretRef:
+ name: secret-postgres
+ volumes:
+ - name: config
+ configMap:
+ name: sql-exporter-config
+---
+# Source: coder-observability/charts/loki/templates/tests/test-canary.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+ name: "loki-helm-test"
+ namespace: coder-observability
+ labels:
+ app.kubernetes.io/name: loki
+ app.kubernetes.io/instance: coder-observability
+ app.kubernetes.io/managed-by: Helm
+ app.kubernetes.io/component: helm-test
+ annotations:
+ "helm.sh/hook": test
+spec:
+ containers:
+ - name: loki-helm-test
+ image: docker.io/grafana/loki-helm-test:ewelch-distributed-helm-chart-17db5ee
+ env:
+ - name: CANARY_SERVICE_ADDRESS
+ value: "http://loki-canary:3500/metrics"
+ - name: CANARY_PROMETHEUS_ADDRESS
+ value: ""
+ - name: CANARY_TEST_TIMEOUT
+ value: "1m"
+ args:
+ - -test.v
+ restartPolicy: Never
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/post-install-create-bucket-job.yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: loki-storage-make-bucket-job
+ namespace: "coder-observability"
+ labels:
+ app: minio-make-bucket-job
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+ annotations:
+ "helm.sh/hook": post-install,post-upgrade
+ "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
+spec:
+ template:
+ metadata:
+ labels:
+ app: minio-job
+ release: coder-observability
+ app.kubernetes.io/name: loki-storage
+ spec:
+ restartPolicy: OnFailure
+ volumes:
+ - name: minio-configuration
+ projected:
+ sources:
+ - configMap:
+ name: loki-storage
+ - secret:
+ name: loki-storage
+ serviceAccountName: minio-sa
+ containers:
+ - name: minio-mc
+ imagePullPolicy: IfNotPresent
+ command: ["/bin/sh", "/config/initialize"]
+ env:
+ - name: MINIO_ENDPOINT
+ value: loki-storage
+ - name: MINIO_PORT
+ value: "9000"
+ volumeMounts:
+ - name: minio-configuration
+ mountPath: /config
+ resources:
+ requests:
+ memory: 128Mi
+---
+# Source: coder-observability/charts/loki/charts/minio/templates/post-install-create-user-job.yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: loki-storage-make-user-job
+ namespace: "coder-observability"
+ labels:
+ app: minio-make-user-job
+ chart: minio-4.0.15
+ release: coder-observability
+ heritage: Helm
+ annotations:
+ "helm.sh/hook": post-install,post-upgrade
+ "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
+spec:
+ template:
+ metadata:
+ labels:
+ app: minio-job
+ release: coder-observability
+ app.kubernetes.io/name: loki-storage
+ spec:
+ restartPolicy: OnFailure
+ volumes:
+ - name: minio-configuration
+ projected:
+ sources:
+ - configMap:
+ name: loki-storage
+ - secret:
+ name: loki-storage
+ serviceAccountName: minio-sa
+ containers:
+ - name: minio-mc
+ imagePullPolicy: IfNotPresent
+ command: ["/bin/sh", "/config/add-user"]
+ env:
+ - name: MINIO_ENDPOINT
+ value: loki-storage
+ - name: MINIO_PORT
+ value: "9000"
+ volumeMounts:
+ - name: minio-configuration
+ mountPath: /config
+ resources:
+ requests:
+ memory: 128Mi
diff --git a/scripts/check-unstaged.sh b/scripts/check-unstaged.sh
new file mode 100755
index 0000000..ba39211
--- /dev/null
+++ b/scripts/check-unstaged.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+source "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
+
+check_unstaged
\ No newline at end of file
diff --git a/scripts/compile.sh b/scripts/compile.sh
new file mode 100755
index 0000000..a00ce4f
--- /dev/null
+++ b/scripts/compile.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# check versions
+HELM_VERSION=3.17
+YQ_VERSION=4.42
+[[ "$(helm version)" == *v${HELM_VERSION}* ]] || { echo "Expected helm version v${HELM_VERSION} but got $(helm version)" >&2; exit 1; }
+[[ "$(yq --version)" == *v${YQ_VERSION}* ]] || { echo "Expected yq version v${YQ_VERSION} but got $(yq --version)" >&2; exit 1; }
+
+source "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
+
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo add grafana https://grafana.github.io/helm-charts
+helm --repository-cache /tmp/cache repo update
+# Check for unexpected changes.
+# Helm dependencies are versioned using ^ which accepts minor & patch changes:
+# e.g. ^1.2.3 is equivalent to >= 1.2.3 < 2.0.0
+helm dependency update coder-observability/
+# We *expect* that the versions will change in the rendered template output, so we ignore those, but
+# if there are changes to the manifests themselves then we need to fail the build to force manual review.
+helm template --namespace coder-observability -f coder-observability/values.yaml coder-observability coder-observability/ | \
+ yq e 'del(.spec.template.spec.containers[].image, .metadata.labels."helm.sh/chart", .metadata.labels."app.kubernetes.io/version")' - \
+ > compiled/resources.yaml
+
+check_unstaged "compiled"
\ No newline at end of file
diff --git a/scripts/lib.sh b/scripts/lib.sh
new file mode 100755
index 0000000..db924f7
--- /dev/null
+++ b/scripts/lib.sh
@@ -0,0 +1,24 @@
+function check_unstaged() {
+ FILES="$(git ls-files --other --modified --exclude-standard -- ${1:-.})"
+ if [[ "$FILES" != "" ]]; then
+ mapfile -t files <<<"$FILES"
+
+ echo
+ echo "The following files contain unstaged changes:"
+ echo
+ for file in "${files[@]}"; do
+ echo " - $file"
+ done
+
+ echo
+ echo "These are the changes:"
+ echo
+ for file in "${files[@]}"; do
+ git --no-pager diff "$file" 1>&2
+ done
+
+ echo
+ echo >&2 "Unstaged changes, see above for details."
+ exit 1
+ fi
+}
\ No newline at end of file
diff --git a/scripts/lint-rules.sh b/scripts/lint-rules.sh
new file mode 100755
index 0000000..095330d
--- /dev/null
+++ b/scripts/lint-rules.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+temp_dir="$(mktemp -d)"
+rules_file="${temp_dir}/rules.yaml"
+helm template coder-o11y coder-observability -f coder-observability/values.yaml --show-only templates/configmap-prometheus-alerts.yaml > ${rules_file}
+
+for key in $(yq e '.data | keys' -o csv ${rules_file} | tr ',' "\n"); do
+ file="${temp_dir}/${key}"
+ echo "=========================== [${file}] ==========================="
+
+ yq e ".data[\"${key}\"]" ${rules_file} > ${file}
+ go run github.com/cloudflare/pint/cmd/pint@latest -l DEBUG lint ${file}
+done
\ No newline at end of file
diff --git a/scripts/publish.sh b/scripts/publish.sh
new file mode 100755
index 0000000..b51878a
--- /dev/null
+++ b/scripts/publish.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euox pipefail
+
+version=$("$(dirname "${BASH_SOURCE[0]}")/version.sh")
+mkdir -p build/helm
+helm package coder-observability --version=${version} --dependency-update --destination build/helm
+gsutil cp gs://helm.coder.com/observability/index.yaml build/helm/index.yaml
+helm repo index build/helm --url https://helm.coder.com/observability --merge build/helm/index.yaml
+gsutil -h "Cache-Control:no-cache,max-age=0" cp build/helm/index.yaml gs://helm.coder.com/observability/
+gsutil -h "Cache-Control:no-cache,max-age=0" cp build/helm/coder-observability-${version}.tgz gs://helm.coder.com/observability/
+gsutil -h "Cache-Control:no-cache,max-age=0" cp artifacthub-repo.yaml gs://helm.coder.com/observability/
+
+echo $version
\ No newline at end of file
diff --git a/scripts/version.sh b/scripts/version.sh
new file mode 100755
index 0000000..488df3a
--- /dev/null
+++ b/scripts/version.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+
+# This script generates the version string used by the helm chart, including for
+# dev versions. Note: the version returned by this script will NOT include the "v"
+# prefix that is included in the Git tag.
+# The script can also bump the version based on the given argument (major, minor, patch).
+
+set -euo pipefail
+
+remote_url=$(git remote get-url origin)
+current_version="$(git tag -l | sort --version-sort | tail -n1)"
+
+function help() {
+ echo "$0 [options] [arguments]"
+ echo " "
+ echo "options:"
+ echo "-h, --help show brief help"
+ echo "-c, --current show the current version"
+ echo "-b, --bump bump the version based on the given argument"
+ exit 0
+}
+
+function bump_version() {
+ local version=$1
+ local new_version
+
+ if [[ $version == "major" ]]; then
+ new_version=$(echo $current_version | awk -F. '{print $1+1".0.0"}')
+ elif [[ $version == "minor" ]]; then
+ new_version=$(echo $current_version | awk -F. '{print $1"."$2+1".0"}')
+ elif [[ $version == "patch" ]]; then
+ new_version=$(echo $current_version | awk -F. '{print $1"."$2"."$3+1}')
+ else
+ echo "Error: Unknown argument $version"
+ exit 1
+ fi
+
+ echo $new_version
+}
+
+function show_current() {
+ # Version without the "v" prefix.
+ echo "${current_version#v}"
+}
+
+if [ $# == 0 ]; then
+ show_current
+fi
+
+while test $# -gt 0; do
+ case "$1" in
+ -h|--help)
+ help
+ ;;
+ -c|--current)
+ show_current
+ shift
+ ;;
+ -b|--bump)
+ if [ $# -lt 2 ]; then
+ echo "Error: Missing argument for bump"
+ exit 1
+ fi
+ shift
+ bump_version $1
+ shift
+ ;;
+ *)
+ echo "Error: Unknown argument $1"
+ exit 1
+ ;;
+ esac
+done