diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..11c3600 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.tpl linguist-language=go \ No newline at end of file diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..4433626 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,33 @@ +name: Lint +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v2 + with: + go-version: 1.23 + + - name: Install Helm + uses: azure/setup-helm@v4 + with: + version: v3.17.1 + + - name: Install yq + run: | + sudo wget https://github.com/mikefarah/yq/releases/download/v4.42.1/yq_linux_amd64 -O /usr/bin/yq &&\ + sudo chmod +x /usr/bin/yq + + - name: Lint Helm chart and rules + run: make lint \ No newline at end of file diff --git a/.github/workflows/nightly-build.yaml b/.github/workflows/nightly-build.yaml new file mode 100644 index 0000000..e64f92c --- /dev/null +++ b/.github/workflows/nightly-build.yaml @@ -0,0 +1,48 @@ +name: Nightly build + +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: # Allows manual triggering of the workflow + +jobs: + nightly-build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v2 + with: + go-version: 1.22 + + - name: Install Helm + uses: azure/setup-helm@v4 + with: + version: v3.17.1 + + - name: Install yq + run: | + sudo wget https://github.com/mikefarah/yq/releases/download/v4.42.1/yq_linux_amd64 -O /usr/bin/yq &&\ + sudo chmod +x /usr/bin/yq + + - name: make build + run: | + make build > output.log 2>&1 + continue-on-error: false + + - name: Upload script output + uses: actions/upload-artifact@v4 + with: + name: script-output + path: output.log + + - name: Create issue from file on failure + if: failure() + uses: peter-evans/create-issue-from-file@v5 + with: + title: nightly build failure + content-filepath: output.log + assignees: dannykopping \ No newline at end of file diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..23d45a6 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,55 @@ +# GitHub release workflow. +name: publish-helm +on: + push: + tags: + - v* + +permissions: + # Required to publish a release + contents: write + # Necessary to push docker images to ghcr.io. + packages: write + # Necessary for GCP authentication (https://github.com/google-github-actions/setup-gcloud#usage) + id-token: write + +concurrency: ${{ github.workflow }}-${{ github.ref }} + +jobs: + release: + name: Build and publish + runs-on: ubuntu-latest + outputs: + version: ${{ steps.version.outputs.version }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # If the event that triggered the build was an annotated tag (which our + # tags are supposed to be), actions/checkout has a bug where the tag in + # question is only a lightweight tag and not a full annotated tag. This + # command seems to fix it. + # https://github.com/actions/checkout/issues/290 + - name: Fetch git tags + run: git fetch --tags --force + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: projects/898976630798/locations/global/workloadIdentityPools/coder-ci/providers/github-actions + service_account: coder-observability@coder-customer-releases.iam.gserviceaccount.com + + - name: Setup GCloud SDK + uses: "google-github-actions/setup-gcloud@v2" + + - name: Install helm + uses: azure/setup-helm@v4 + with: + version: v3.9.2 + + - name: Publish Helm Chart + if: ${{ !inputs.dry_run }} + run: | + ./scripts/publish.sh diff --git a/.gitignore b/.gitignore index ee3892e..949b791 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ charts/ +build/ +scratch diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8bb9049 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,30 @@ +# CHANGELOG + +## v0.3.0 + +- Adding prebuilt workspace dashboard & alerts + +## v0.2.1 + +- Upgraded subcharts + - Loki: upgraded to v6.7.1 -> v6.7.3 +- FIX: `listen-address` duplicate removed in `prometheus-config-reloader` + +## v0.2.0 + +- Upgraded subcharts + - Grafana: upgraded from v7.3.7 -> v7.3.12 + - Prometheus: upgraded to v25.18.0 -> v25.24.1 + - Loki: upgraded to v6.3.4 -> v6.7.1 + +## v0.1.0 + +- Lint Helm chart in CI + +## v0.0.2 -> v0.0.11 + +- Several stability & configurability improvements + +## v0.0.1 + +- Initial release diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..4521512 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @dannykopping \ No newline at end of file diff --git a/LICENSE b/LICENSE index 38bdde7..f7c5d7f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,121 @@ -MIT License - -Copyright (c) 2024 Coder - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be + protected by copyright and related or neighboring rights ("Copyright and + Related Rights"). Copyright and Related Rights include, but are not + limited to, the following: + +i. the right to reproduce, adapt, distribute, perform, display, +communicate, and translate a Work; +ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or +likeness depicted in a Work; +iv. rights protecting against unfair competition in regards to a Work, +subject to the limitations in paragraph 4(a), below; +v. rights protecting the extraction, dissemination, use and reuse of data +in a Work; +vi. database rights (such as those arising under Directive 96/9/EC of the +European Parliament and of the Council of 11 March 1996 on the legal +protection of databases, and under any national implementation +thereof, including any amended or successor version of such +directive); and +vii. other similar, equivalent or corresponding rights throughout the +world based on applicable law or treaty, and any national +implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention + of, applicable law, Affirmer hereby overtly, fully, permanently, + irrevocably and unconditionally waives, abandons, and surrenders all of + Affirmer's Copyright and Related Rights and associated claims and causes + of action, whether now known or unknown (including existing as well as + future claims and causes of action), in the Work (i) in all territories + worldwide, (ii) for the maximum duration provided by applicable law or + treaty (including future time extensions), (iii) in any current or future + medium and for any number of copies, and (iv) for any purpose whatsoever, + including without limitation commercial, advertising or promotional + purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each + member of the public at large and to the detriment of Affirmer's heirs and + successors, fully intending that such Waiver shall not be subject to + revocation, rescission, cancellation, termination, or any other legal or + equitable action to disrupt the quiet enjoyment of the Work by the public + as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason + be judged legally invalid or ineffective under applicable law, then the + Waiver shall be preserved to the maximum extent permitted taking into + account Affirmer's express Statement of Purpose. In addition, to the + extent the Waiver is so judged Affirmer hereby grants to each affected + person a royalty-free, non transferable, non sublicensable, non exclusive, + irrevocable and unconditional license to exercise Affirmer's Copyright and + Related Rights in the Work (i) in all territories worldwide, (ii) for the + maximum duration provided by applicable law or treaty (including future + time extensions), (iii) in any current or future medium and for any number + of copies, and (iv) for any purpose whatsoever, including without + limitation commercial, advertising or promotional purposes (the + "License"). The License shall be deemed effective as of the date CC0 was + applied by Affirmer to the Work. Should any part of the License for any + reason be judged legally invalid or ineffective under applicable law, such + partial invalidity or ineffectiveness shall not invalidate the remainder + of the License, and in such case Affirmer hereby affirms that he or she + will not (i) exercise any of his or her remaining Copyright and Related + Rights in the Work or (ii) assert any associated claims and causes of + action with respect to the Work, in either case contrary to Affirmer's + express Statement of Purpose. + +4. Limitations and Disclaimers. + +a. No trademark or patent rights held by Affirmer are waived, abandoned, +surrendered, licensed or otherwise affected by this document. +b. Affirmer offers the Work as-is and makes no representations or +warranties of any kind concerning the Work, express, implied, +statutory or otherwise, including without limitation warranties of +title, merchantability, fitness for a particular purpose, non +infringement, or the absence of latent or other defects, accuracy, or +the present or absence of errors, whether or not discoverable, all to +the greatest extent permissible under applicable law. +c. Affirmer disclaims responsibility for clearing rights of other persons +that may apply to the Work or any use thereof, including without +limitation any person's Copyright and Related Rights in the Work. +Further, Affirmer disclaims responsibility for obtaining any necessary +consents, permissions or other rights required for any use of the +Work. +d. Affirmer understands and acknowledges that Creative Commons is not a +party to this document and has no duty or obligation with respect to +this CC0 or use of the Work. \ No newline at end of file diff --git a/Makefile b/Makefile index 1006dfc..3973683 100644 --- a/Makefile +++ b/Makefile @@ -10,16 +10,37 @@ SHELL := bash all: lint .PHONY: all -lint: lint/helm +lint: build lint/helm lint/rules readme + ./scripts/check-unstaged.sh .PHONY: lint lint/helm: lint/helm/coder-observability .PHONY: lint/helm lint/helm/coder-observability: - helm dependency update --skip-refresh coder-observability/ - helm lint --strict --set coder.image.tag=v0.0.1 coder-observability/ + helm lint --strict --set coder.image.tag=v$(shell ./scripts/version.sh) coder-observability/ .PHONY: lint/helm/coder-observability -build: # TODO - echo helm package --version "${VERSION}" \ No newline at end of file +build: + ./scripts/compile.sh +.PHONY: build + +lint/rules: lint/helm/prometheus-rules +.PHONY: lint/rules + +lint/helm/prometheus-rules: + @./scripts/lint-rules.sh + +.PHONY: lint/helm/prometheus-rules + +# Usage: publish-patch, publish-minor, publish-major +# Publishing is handled by GitHub Actions, triggered by tag creation. +publish-%: + version=$(shell ./scripts/version.sh --bump $*) && \ + git tag --sign "$$version" -m "Release: $$version" && \ + git push origin tag "$$version" + +readme: + go install github.com/norwoodj/helm-docs/cmd/helm-docs@latest + helm-docs --output-file ../README.md \ + --values-file=values.yaml --chart-search-root=coder-observability --template-files=../README.gotmpl \ No newline at end of file diff --git a/PUBLISH.md b/PUBLISH.md new file mode 100644 index 0000000..6828c04 --- /dev/null +++ b/PUBLISH.md @@ -0,0 +1,4 @@ +# Publishing the Coder Observability Chart + +- make desired changes +- run `make publish-{major|minor|patch}` which creates & pushes a new tag, which kicks off a GH Action to publish the chart \ No newline at end of file diff --git a/README.gotmpl b/README.gotmpl new file mode 100644 index 0000000..411d638 --- /dev/null +++ b/README.gotmpl @@ -0,0 +1,242 @@ + + + +# Coder Observability Chart + +> [!NOTE] +> This Helm chart is in BETA; use with caution + +## Overview + +This chart contains a highly opinionated set of integrations between Grafana, Loki, Prometheus, Alertmanager, and +Grafana Agent. + +Dashboards, alerts, and runbooks are preconfigured for monitoring [Coder](https://coder.com/) installations. + +Out of the box: + +Metrics will be scraped from all pods which have a `prometheus.io/scrape=true` annotation.
+Logs will be scraped from all pods in the Kubernetes cluster. + +## Installation + + + +```bash +helm repo add coder-observability https://helm.coder.com/observability +helm upgrade --install coder-observability coder-observability/coder-observability --version 0.1.1 --namespace coder-observability --create-namespace +``` + +## Requirements + +### General + +- Helm 3.7+ + +### Coder + +
+Kubernetes-based deployments + If your installation is not in a namespace named `coder`, you will need to modify: + +```yaml +global: + coder: + controlPlaneNamespace: + externalProvisionersNamespace: +``` + +
+ +
+Non-Kubernetes deployments (click to expand) + Ensure your Coder installation is accessible to the resources created by this chart. + +Set `global.coder.scrapeMetrics` such that the metrics can be scraped from your installation, e.g.: + +```yaml +global: + coder: + scrapeMetrics: + hostname: your.coder.host + port: 2112 + scrapeInterval: 15s + additionalLabels: + job: coder +``` + +If you would like your logs scraped from a process outside Kubernetes, you need to mount the log file(s) in and +configure Grafana Agent to scrape them; here's an example configuration: + +```yaml +grafana-agent: + agent: + mounts: + extra: + - mountPath: /var/log + name: logs + readOnly: true + controller: + volumes: + extra: + - hostPath: + path: /var/log + name: logs + + extraBlocks: |- + loki.source.file "coder_log" { + targets = [ + {__path__ = "/var/log/coder.log", job="coder"}, + ] + forward_to = [loki.write.loki.receiver] + } +``` + +
+ +Ensure these environment variables are set in your Coder deployment: + +- `CODER_PROMETHEUS_ENABLE=true` +- `CODER_PROMETHEUS_COLLECT_AGENT_STATS=true` +- `CODER_LOGGING_HUMAN=/dev/stderr` (only `human` log format is supported + currently; [issue](https://github.com/coder/observability/issues/8)) + +Ensure these labels exist on your Coder & provisioner deployments: + +- `prometheus.io/scrape=true` +- `prometheus.io/port=2112` (ensure this matches the port defined by `CODER_PROMETHEUS_ADDRESS`) + +If you use the [`coder/coder` helm chart](https://github.com/coder/coder/tree/main/helm), you can use the +following: + +```yaml +coder: + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "2112" +``` + +For more details, see +the [coder documentation on exposing Prometheus metrics](https://coder.com/docs/v2/latest/admin/prometheus). + +### Postgres + +You may configure the Helm chart to monitor your Coder deployment's Postgres server. Ensure that the resources created +by this Helm chart can access your Postgres server. + +Create a secret with your Postgres password and reference it as follows, along with the other connection details: + +```yaml +global: + postgres: + hostname: + port: + database: + username: + mountSecret: +``` + +The secret should be in the form of `PGPASSWORD=`, as this secret will be used to create an environment +variable. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: pg-secret + namespace: coder-observability +data: + PGPASSWORD: +``` + +
+Postgres metrics (click to expand) + +A tool called [`postgres-exporter`](https://github.com/prometheus-community/postgres_exporter) is used to scrape metrics +from your Postgres server, and you can see the metrics it is exposing as follows: + +```bash +kubectl -n coder-observability port-forward statefulset/postgres-exporter 9187 + +curl http://localhost:9187/metrics +``` + +
+ +### Grafana + +To access Grafana, run: + +```bash +kubectl -n coder-observability port-forward svc/grafana 3000:80 +``` + +And open your web browser to http://localhost:3000/. + +By default, Grafana is configured to allow anonymous access; if you want password authentication, define this in +your `values.yaml`: + +```yaml +grafana: + admin: + existingSecret: grafana-admin + userKey: username + passwordKey: password + grafana.ini: + auth.anonymous: + enabled: false +``` + +You will also need to define a secret as follows: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: grafana-admin # this matches the "existingSecret" field above +stringData: + username: "" # this matches the "userKey" field above + password: "" # this matches the "passwordKey" field above +``` + +To add an Ingress for Grafana, define this in your `values.yaml`: + +```yaml +grafana: + grafana.ini: + server: + domain: observability.example.com + root_url: "%(protocol)s://%(domain)s/grafana" + serve_from_sub_path: true + ingress: + enabled: true + hosts: + - "observability.example.com" + path: "/" +``` + +## Subcharts + +{{ template "chart.requirementsTable" . }} + +Each subchart can be disabled by setting the `enabled` field to `false`. + +| Subchart | Setting | +|-----------------|-------------------------| +| `grafana` | `grafana.enabled` | +| `grafana-agent` | `grafana-agent.enabled` | +| `loki` | `loki.enabled` | +| `prometheus` | `prometheus.enabled` | + +## Values + +The `global` values are the values which pertain to this chart, while the rest pertain to the subcharts. +These values represent only the values _set_ in this chart. For the full list of available values, please see each +subchart. + +For example, the `grafana.replicas` value is set by this chart by default, and is one of hundreds of available +values which are defined [here](https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration). + +{{ template "chart.valuesTable" . }} + +{{ template "helm-docs.versionFooter" . }} diff --git a/README.md b/README.md index 05e3f80..1a80c26 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,492 @@ + + + # Coder Observability Chart -Requires Helm 3.7+ +> [!NOTE] +> This Helm chart is in BETA; use with caution + +## Overview + +This chart contains a highly opinionated set of integrations between Grafana, Loki, Prometheus, Alertmanager, and +Grafana Agent. + +Dashboards, alerts, and runbooks are preconfigured for monitoring [Coder](https://coder.com/) installations. + +Out of the box: + +Metrics will be scraped from all pods which have a `prometheus.io/scrape=true` annotation.
+Logs will be scraped from all pods in the Kubernetes cluster. + +## Installation + + + +```bash +helm repo add coder-observability https://helm.coder.com/observability +helm upgrade --install coder-observability coder-observability/coder-observability --version 0.1.1 --namespace coder-observability --create-namespace +``` + +## Requirements + +### General + +- Helm 3.7+ + +### Coder + +
+Kubernetes-based deployments + If your installation is not in a namespace named `coder`, you will need to modify: + +```yaml +global: + coder: + controlPlaneNamespace: + externalProvisionersNamespace: +``` + +
+ +
+Non-Kubernetes deployments (click to expand) + Ensure your Coder installation is accessible to the resources created by this chart. + +Set `global.coder.scrapeMetrics` such that the metrics can be scraped from your installation, e.g.: + +```yaml +global: + coder: + scrapeMetrics: + hostname: your.coder.host + port: 2112 + scrapeInterval: 15s + additionalLabels: + job: coder +``` + +If you would like your logs scraped from a process outside Kubernetes, you need to mount the log file(s) in and +configure Grafana Agent to scrape them; here's an example configuration: + +```yaml +grafana-agent: + agent: + mounts: + extra: + - mountPath: /var/log + name: logs + readOnly: true + controller: + volumes: + extra: + - hostPath: + path: /var/log + name: logs + + extraBlocks: |- + loki.source.file "coder_log" { + targets = [ + {__path__ = "/var/log/coder.log", job="coder"}, + ] + forward_to = [loki.write.loki.receiver] + } +``` + +
+ +Ensure these environment variables are set in your Coder deployment: + +- `CODER_PROMETHEUS_ENABLE=true` +- `CODER_PROMETHEUS_COLLECT_AGENT_STATS=true` +- `CODER_LOGGING_HUMAN=/dev/stderr` (only `human` log format is supported + currently; [issue](https://github.com/coder/observability/issues/8)) + +Ensure these labels exist on your Coder & provisioner deployments: + +- `prometheus.io/scrape=true` +- `prometheus.io/port=2112` (ensure this matches the port defined by `CODER_PROMETHEUS_ADDRESS`) + +If you use the [`coder/coder` helm chart](https://github.com/coder/coder/tree/main/helm), you can use the +following: + +```yaml +coder: + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "2112" +``` + +For more details, see +the [coder documentation on exposing Prometheus metrics](https://coder.com/docs/v2/latest/admin/prometheus). + +### Postgres + +You may configure the Helm chart to monitor your Coder deployment's Postgres server. Ensure that the resources created +by this Helm chart can access your Postgres server. + +Create a secret with your Postgres password and reference it as follows, along with the other connection details: + +```yaml +global: + postgres: + hostname: + port: + database: + username: + mountSecret: +``` + +The secret should be in the form of `PGPASSWORD=`, as this secret will be used to create an environment +variable. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: pg-secret + namespace: coder-observability +data: + PGPASSWORD: +``` + +
+Postgres metrics (click to expand) + +A tool called [`postgres-exporter`](https://github.com/prometheus-community/postgres_exporter) is used to scrape metrics +from your Postgres server, and you can see the metrics it is exposing as follows: + +```bash +kubectl -n coder-observability port-forward statefulset/postgres-exporter 9187 + +curl http://localhost:9187/metrics +``` + +
+ +### Grafana + +To access Grafana, run: + +```bash +kubectl -n coder-observability port-forward svc/grafana 3000:80 +``` + +And open your web browser to http://localhost:3000/. + +By default, Grafana is configured to allow anonymous access; if you want password authentication, define this in +your `values.yaml`: + +```yaml +grafana: + admin: + existingSecret: grafana-admin + userKey: username + passwordKey: password + grafana.ini: + auth.anonymous: + enabled: false +``` + +You will also need to define a secret as follows: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: grafana-admin # this matches the "existingSecret" field above +stringData: + username: "" # this matches the "userKey" field above + password: "" # this matches the "passwordKey" field above +``` + +To add an Ingress for Grafana, define this in your `values.yaml`: + +```yaml +grafana: + grafana.ini: + server: + domain: observability.example.com + root_url: "%(protocol)s://%(domain)s/grafana" + serve_from_sub_path: true + ingress: + enabled: true + hosts: + - "observability.example.com" + path: "/" +``` + +## Subcharts + +| Repository | Name | Version | +|------------|------|---------| +| https://grafana.github.io/helm-charts | grafana | ~v7.3.7 | +| https://grafana.github.io/helm-charts | grafana-agent(grafana-agent) | ~0.37.0 | +| https://grafana.github.io/helm-charts | loki | ~v6.7.3 | +| https://prometheus-community.github.io/helm-charts | prometheus | ~v25.24.1 | + +Each subchart can be disabled by setting the `enabled` field to `false`. + +| Subchart | Setting | +|-----------------|-------------------------| +| `grafana` | `grafana.enabled` | +| `grafana-agent` | `grafana-agent.enabled` | +| `loki` | `loki.enabled` | +| `prometheus` | `prometheus.enabled` | + +## Values + +The `global` values are the values which pertain to this chart, while the rest pertain to the subcharts. +These values represent only the values _set_ in this chart. For the full list of available values, please see each +subchart. + +For example, the `grafana.replicas` value is set by this chart by default, and is one of hundreds of available +values which are defined [here](https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration). + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| global.coder.alerts | object | `{"coderd":{"groups":{"CPU":{"delay":"10m","enabled":true,"period":"10m","thresholds":{"critical":0.9,"warning":0.8}},"IneligiblePrebuilds":{"delay":"10m","enabled":true,"thresholds":{"notify":1}},"Memory":{"delay":"10m","enabled":true,"thresholds":{"critical":0.9,"warning":0.8}},"Replicas":{"delay":"5m","enabled":true,"thresholds":{"critical":1,"notify":3,"warning":2}},"Restarts":{"delay":"1m","enabled":true,"period":"10m","thresholds":{"critical":3,"notify":1,"warning":2}},"UnprovisionedPrebuiltWorkspaces":{"delay":"10m","enabled":true,"thresholds":{"warn":1}},"WorkspaceBuildFailures":{"delay":"10m","enabled":true,"period":"10m","thresholds":{"critical":10,"notify":2,"warning":5}}}},"enterprise":{"groups":{"Licences":{"delay":"1m","enabled":true,"thresholds":{"critical":1,"warning":0.9}}}},"provisionerd":{"groups":{"Replicas":{"delay":"5m","enabled":true,"thresholds":{"critical":1,"notify":3,"warning":2}}}}}` | alerts for the various aspects of Coder | +| global.coder.coderdSelector | string | `"pod=~`coder.*`, pod!~`.*provisioner.*`"` | series selector for Prometheus/Loki to locate provisioner pods. ensure this uses backticks for quotes! | +| global.coder.controlPlaneNamespace | string | `"coder"` | the namespace into which the control plane has been deployed. | +| global.coder.externalProvisionersNamespace | string | `"coder"` | the namespace into which any external provisioners have been deployed. | +| global.coder.logFormat | string | `"human"` | | +| global.coder.provisionerdSelector | string | `"pod=~`coder-provisioner.*`"` | series selector for Prometheus/Loki to locate provisioner pods. https://coder.com/docs/v2/latest/admin/provisioners TODO: rename container label in provisioner helm chart to be "provisioner" not "coder" ensure this uses backticks for quotes! | +| global.coder.scrapeMetrics | string | `nil` | use this to scrape metrics from a standalone (set of) coder deployment(s) if using kubernetes, rather add an annotation "prometheus.io/scrape=true" and coder will get automatically scraped; set this value to null and configure coderdSelector to target your coder pods | +| global.coder.workspacesSelector | string | `"namespace=`coder-workspaces`"` | the namespace into which any external provisioners have been deployed. | +| global.dashboards | object | `{"queryTimeout":900,"refresh":"30s","timerange":"12h"}` | settings for bundled dashboards | +| global.dashboards.queryTimeout | int | `900` | how long until a query in Grafana will timeout after | +| global.dashboards.refresh | string | `"30s"` | how often dashboards should refresh | +| global.dashboards.timerange | string | `"12h"` | how far back dashboards should look | +| global.externalScheme | string | `"http"` | | +| global.externalZone | string | `"svc.cluster.local"` | | +| global.postgres | object | `{"alerts":{"groups":{"Basic":{"delay":"1m","enabled":true},"Connections":{"delay":"5m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}},"Notifications":{"delay":"15m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}}}},"database":"coder","exporter":{"image":"quay.io/prometheuscommunity/postgres-exporter"},"hostname":"localhost","mountSecret":"secret-postgres","password":null,"port":5432,"sslmode":"disable","sslrootcert":null,"username":"coder","volumeMounts":[],"volumes":[]}` | postgres connection information NOTE: these settings are global so we can parameterise some values which get rendered by subcharts | +| global.postgres.alerts | object | `{"groups":{"Basic":{"delay":"1m","enabled":true},"Connections":{"delay":"5m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}},"Notifications":{"delay":"15m","enabled":true,"thresholds":{"critical":0.9,"notify":0.5,"warning":0.8}}}}` | alerts for postgres | +| global.telemetry | object | `{"metrics":{"scrape_interval":"15s","scrape_timeout":"12s"}}` | control telemetry collection | +| global.telemetry.metrics | object | `{"scrape_interval":"15s","scrape_timeout":"12s"}` | control metric collection | +| global.telemetry.metrics.scrape_interval | string | `"15s"` | how often the collector will scrape discovered pods | +| global.telemetry.metrics.scrape_timeout | string | `"12s"` | how long a request will be allowed to wait before being canceled | +| global.zone | string | `"svc"` | | +| grafana-agent.agent.clustering.enabled | bool | `false` | | +| grafana-agent.agent.configMap.create | bool | `false` | | +| grafana-agent.agent.configMap.key | string | `"config.river"` | | +| grafana-agent.agent.configMap.name | string | `"collector-config"` | | +| grafana-agent.agent.extraArgs[0] | string | `"--disable-reporting=true"` | | +| grafana-agent.agent.mode | string | `"flow"` | | +| grafana-agent.agent.mounts.dockercontainers | bool | `true` | | +| grafana-agent.agent.mounts.varlog | bool | `true` | | +| grafana-agent.commonRelabellings | string | `"rule {\n source_labels = [\"__meta_kubernetes_namespace\"]\n target_label = \"namespace\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_name\"]\n target_label = \"pod\"\n}\n// coalesce the following labels and pick the first value; we'll use this to define the \"job\" label\nrule {\n source_labels = [\"__meta_kubernetes_pod_label_app_kubernetes_io_component\", \"app\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n target_label = \"__meta_app\"\n action = \"replace\"\n regex = \"^/*([^/]+?)(?:/.*)?$\" // split by the delimiter if it exists, we only want the first one\n replacement = \"${1}\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_namespace\", \"__meta_kubernetes_pod_label_app_kubernetes_io_name\", \"__meta_app\"]\n separator = \"/\"\n target_label = \"job\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_container_name\"]\n target_label = \"container\"\n}\nrule {\n regex = \"__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)\"\n action = \"labeldrop\"\n}\nrule {\n regex = \"pod_template_generation\"\n action = \"labeldrop\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_phase\"]\n regex = \"Pending|Succeeded|Failed|Completed\"\n action = \"drop\"\n}\nrule {\n source_labels = [\"__meta_kubernetes_pod_node_name\"]\n action = \"replace\"\n target_label = \"node\"\n}\nrule {\n action = \"labelmap\"\n regex = \"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)\"\n replacement = \"__param_$1\"\n}"` | | +| grafana-agent.controller.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| grafana-agent.controller.type | string | `"daemonset"` | | +| grafana-agent.crds.create | bool | `false` | | +| grafana-agent.discovery | string | `"// Discover k8s nodes\ndiscovery.kubernetes \"nodes\" {\n role = \"node\"\n}\n\n// Discover k8s pods\ndiscovery.kubernetes \"pods\" {\n role = \"pod\"\n selectors {\n role = \"pod\"\n }\n}"` | | +| grafana-agent.enabled | bool | `true` | | +| grafana-agent.extraBlocks | string | `""` | | +| grafana-agent.fullnameOverride | string | `"grafana-agent"` | | +| grafana-agent.podLogsRelabelRules | string | `""` | | +| grafana-agent.podMetricsRelabelRules | string | `""` | | +| grafana-agent.withOTLPReceiver | bool | `false` | | +| grafana."grafana.ini"."auth.anonymous".enabled | bool | `true` | | +| grafana."grafana.ini"."auth.anonymous".org_name | string | `"Main Org."` | | +| grafana."grafana.ini"."auth.anonymous".org_role | string | `"Admin"` | | +| grafana."grafana.ini".analytics.reporting_enabled | bool | `false` | | +| grafana."grafana.ini".dashboards.default_home_dashboard_path | string | `"/var/lib/grafana/dashboards/coder/0/status.json"` | | +| grafana."grafana.ini".dataproxy.timeout | string | `"{{ $.Values.global.dashboards.queryTimeout }}"` | | +| grafana."grafana.ini".feature_toggles.autoMigrateOldPanels | bool | `true` | | +| grafana."grafana.ini".users.allow_sign_up | bool | `false` | | +| grafana.admin.existingSecret | string | `""` | | +| grafana.annotations."prometheus.io/scrape" | string | `"true"` | | +| grafana.dashboardProviders."coder.yaml".apiVersion | int | `1` | | +| grafana.dashboardProviders."coder.yaml".providers[0].disableDeletion | bool | `false` | | +| grafana.dashboardProviders."coder.yaml".providers[0].editable | bool | `false` | | +| grafana.dashboardProviders."coder.yaml".providers[0].folder | string | `"Coder"` | | +| grafana.dashboardProviders."coder.yaml".providers[0].name | string | `"coder"` | | +| grafana.dashboardProviders."coder.yaml".providers[0].options.path | string | `"/var/lib/grafana/dashboards/coder"` | | +| grafana.dashboardProviders."coder.yaml".providers[0].orgId | int | `1` | | +| grafana.dashboardProviders."coder.yaml".providers[0].type | string | `"file"` | | +| grafana.dashboardProviders."coder.yaml".providers[0].updateIntervalSeconds | int | `5` | | +| grafana.dashboardProviders."infra.yaml".apiVersion | int | `1` | | +| grafana.dashboardProviders."infra.yaml".providers[0].disableDeletion | bool | `false` | | +| grafana.dashboardProviders."infra.yaml".providers[0].editable | bool | `false` | | +| grafana.dashboardProviders."infra.yaml".providers[0].folder | string | `"Infrastructure"` | | +| grafana.dashboardProviders."infra.yaml".providers[0].name | string | `"infra"` | | +| grafana.dashboardProviders."infra.yaml".providers[0].options.path | string | `"/var/lib/grafana/dashboards/infra"` | | +| grafana.dashboardProviders."infra.yaml".providers[0].orgId | int | `1` | | +| grafana.dashboardProviders."infra.yaml".providers[0].type | string | `"file"` | | +| grafana.dashboardProviders."sidecar.yaml".apiVersion | int | `1` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].disableDeletion | bool | `false` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].editable | bool | `false` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].folder | string | `"Other"` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].name | string | `"sidecar"` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].options.path | string | `"/tmp/dashboards"` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].orgId | int | `1` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].type | string | `"file"` | | +| grafana.dashboardProviders."sidecar.yaml".providers[0].updateIntervalSeconds | int | `30` | | +| grafana.dashboards.infra.node-exporter-full.datasource | string | `"metrics"` | | +| grafana.dashboards.infra.node-exporter-full.gnetId | int | `1860` | | +| grafana.dashboards.infra.node-exporter-full.revision | int | `36` | | +| grafana.dashboards.infra.postgres-database.datasource | string | `"metrics"` | | +| grafana.dashboards.infra.postgres-database.gnetId | int | `9628` | | +| grafana.dashboards.infra.postgres-database.revision | int | `7` | | +| grafana.datasources."datasources.yaml".apiVersion | int | `1` | | +| grafana.datasources."datasources.yaml".datasources[0].access | string | `"proxy"` | | +| grafana.datasources."datasources.yaml".datasources[0].editable | bool | `false` | | +| grafana.datasources."datasources.yaml".datasources[0].isDefault | bool | `true` | | +| grafana.datasources."datasources.yaml".datasources[0].name | string | `"metrics"` | | +| grafana.datasources."datasources.yaml".datasources[0].timeout | string | `"{{ add $.Values.global.dashboards.queryTimeout 5 }}"` | | +| grafana.datasources."datasources.yaml".datasources[0].type | string | `"prometheus"` | | +| grafana.datasources."datasources.yaml".datasources[0].uid | string | `"prometheus"` | | +| grafana.datasources."datasources.yaml".datasources[0].url | string | `"http://prometheus.{{ .Release.Namespace }}.{{ $.Values.global.zone }}"` | | +| grafana.datasources."datasources.yaml".datasources[1].access | string | `"proxy"` | | +| grafana.datasources."datasources.yaml".datasources[1].editable | bool | `false` | | +| grafana.datasources."datasources.yaml".datasources[1].isDefault | bool | `false` | | +| grafana.datasources."datasources.yaml".datasources[1].name | string | `"logs"` | | +| grafana.datasources."datasources.yaml".datasources[1].timeout | string | `"{{ add $.Values.global.dashboards.queryTimeout 5 }}"` | | +| grafana.datasources."datasources.yaml".datasources[1].type | string | `"loki"` | | +| grafana.datasources."datasources.yaml".datasources[1].uid | string | `"loki"` | | +| grafana.datasources."datasources.yaml".datasources[1].url | string | `"http://loki-gateway.{{ .Release.Namespace }}.{{ $.Values.global.zone }}"` | | +| grafana.datasources."datasources.yaml".datasources[2].editable | bool | `false` | | +| grafana.datasources."datasources.yaml".datasources[2].isDefault | bool | `false` | | +| grafana.datasources."datasources.yaml".datasources[2].jsonData.sslmode | string | `"{{ .Values.global.postgres.sslmode }}"` | | +| grafana.datasources."datasources.yaml".datasources[2].name | string | `"postgres"` | | +| grafana.datasources."datasources.yaml".datasources[2].secureJsonData.password | string | `"{{ if .Values.global.postgres.password }}{{ .Values.global.postgres.password }}{{ else }}$PGPASSWORD{{ end }}"` | | +| grafana.datasources."datasources.yaml".datasources[2].timeout | string | `"{{ add $.Values.global.dashboards.queryTimeout 5 }}"` | | +| grafana.datasources."datasources.yaml".datasources[2].type | string | `"postgres"` | | +| grafana.datasources."datasources.yaml".datasources[2].uid | string | `"postgres"` | | +| grafana.datasources."datasources.yaml".datasources[2].url | string | `"{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}"` | | +| grafana.datasources."datasources.yaml".datasources[2].user | string | `"{{ .Values.global.postgres.username }}"` | | +| grafana.deploymentStrategy.type | string | `"Recreate"` | | +| grafana.enabled | bool | `true` | | +| grafana.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION | bool | `true` | | +| grafana.extraConfigmapMounts[0].configMap | string | `"dashboards-status"` | | +| grafana.extraConfigmapMounts[0].mountPath | string | `"/var/lib/grafana/dashboards/coder/0"` | | +| grafana.extraConfigmapMounts[0].name | string | `"dashboards-status"` | | +| grafana.extraConfigmapMounts[0].readOnly | bool | `false` | | +| grafana.extraConfigmapMounts[1].configMap | string | `"dashboards-coderd"` | | +| grafana.extraConfigmapMounts[1].mountPath | string | `"/var/lib/grafana/dashboards/coder/1"` | | +| grafana.extraConfigmapMounts[1].name | string | `"dashboards-coderd"` | | +| grafana.extraConfigmapMounts[1].readOnly | bool | `false` | | +| grafana.extraConfigmapMounts[2].configMap | string | `"dashboards-provisionerd"` | | +| grafana.extraConfigmapMounts[2].mountPath | string | `"/var/lib/grafana/dashboards/coder/2"` | | +| grafana.extraConfigmapMounts[2].name | string | `"dashboards-provisionerd"` | | +| grafana.extraConfigmapMounts[2].readOnly | bool | `false` | | +| grafana.extraConfigmapMounts[3].configMap | string | `"dashboards-workspaces"` | | +| grafana.extraConfigmapMounts[3].mountPath | string | `"/var/lib/grafana/dashboards/coder/3"` | | +| grafana.extraConfigmapMounts[3].name | string | `"dashboards-workspaces"` | | +| grafana.extraConfigmapMounts[3].readOnly | bool | `false` | | +| grafana.extraConfigmapMounts[4].configMap | string | `"dashboards-workspace-detail"` | | +| grafana.extraConfigmapMounts[4].mountPath | string | `"/var/lib/grafana/dashboards/coder/4"` | | +| grafana.extraConfigmapMounts[4].name | string | `"dashboards-workspace-detail"` | | +| grafana.extraConfigmapMounts[4].readOnly | bool | `false` | | +| grafana.extraConfigmapMounts[5].configMap | string | `"dashboards-prebuilds"` | | +| grafana.extraConfigmapMounts[5].mountPath | string | `"/var/lib/grafana/dashboards/coder/5"` | | +| grafana.extraConfigmapMounts[5].name | string | `"dashboards-prebuilds"` | | +| grafana.extraConfigmapMounts[5].readOnly | bool | `false` | | +| grafana.fullnameOverride | string | `"grafana"` | | +| grafana.image.tag | string | `"10.4.19"` | | +| grafana.persistence.enabled | bool | `true` | | +| grafana.persistence.size | string | `"10Gi"` | | +| grafana.replicas | int | `1` | | +| grafana.service.enabled | bool | `true` | | +| grafana.sidecar.dashboards.enabled | bool | `false` | | +| grafana.sidecar.dashboards.labelValue | string | `"1"` | | +| grafana.sidecar.dashboards.provider.allowUiUpdates | bool | `true` | | +| grafana.sidecar.dashboards.provider.disableDelete | bool | `true` | | +| grafana.testFramework.enabled | bool | `false` | | +| grafana.useStatefulSet | bool | `true` | | +| loki.backend.extraArgs[0] | string | `"-log.level=debug"` | | +| loki.backend.extraVolumeMounts[0].mountPath | string | `"/var/loki-ruler-wal"` | | +| loki.backend.extraVolumeMounts[0].name | string | `"ruler-wal"` | | +| loki.backend.extraVolumes[0].emptyDir | object | `{}` | | +| loki.backend.extraVolumes[0].name | string | `"ruler-wal"` | | +| loki.backend.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| loki.backend.replicas | int | `1` | | +| loki.chunksCache.allocatedMemory | int | `1024` | | +| loki.enabled | bool | `true` | | +| loki.enterprise.adminApi.enabled | bool | `false` | | +| loki.enterprise.enabled | bool | `false` | | +| loki.enterprise.useExternalLicense | bool | `false` | | +| loki.fullnameOverride | string | `"loki"` | | +| loki.gateway.replicas | int | `1` | | +| loki.loki.auth_enabled | bool | `false` | | +| loki.loki.commonConfig.path_prefix | string | `"/var/loki"` | | +| loki.loki.commonConfig.replication_factor | int | `1` | | +| loki.loki.rulerConfig.alertmanager_url | string | `"http://alertmanager.{{ .Release.Namespace }}.{{ .Values.global.zone}}"` | | +| loki.loki.rulerConfig.enable_alertmanager_v2 | bool | `true` | | +| loki.loki.rulerConfig.enable_api | bool | `true` | | +| loki.loki.rulerConfig.remote_write.clients.fake.headers.Source | string | `"Loki"` | | +| loki.loki.rulerConfig.remote_write.clients.fake.remote_timeout | string | `"30s"` | | +| loki.loki.rulerConfig.remote_write.clients.fake.url | string | `"http://prometheus.{{ .Release.Namespace }}.{{ .Values.global.zone}}/api/v1/write"` | | +| loki.loki.rulerConfig.remote_write.enabled | bool | `true` | | +| loki.loki.rulerConfig.ring.kvstore.store | string | `"inmemory"` | | +| loki.loki.rulerConfig.rule_path | string | `"/rules"` | | +| loki.loki.rulerConfig.storage.local.directory | string | `"/rules"` | | +| loki.loki.rulerConfig.storage.type | string | `"local"` | | +| loki.loki.rulerConfig.wal.dir | string | `"/var/loki-ruler-wal"` | | +| loki.loki.schemaConfig.configs[0].from | string | `"2024-04-01"` | | +| loki.loki.schemaConfig.configs[0].index.period | string | `"24h"` | | +| loki.loki.schemaConfig.configs[0].index.prefix | string | `"index_"` | | +| loki.loki.schemaConfig.configs[0].object_store | string | `"s3"` | | +| loki.loki.schemaConfig.configs[0].schema | string | `"v13"` | | +| loki.loki.schemaConfig.configs[0].store | string | `"tsdb"` | | +| loki.lokiCanary.annotations."prometheus.io/scrape" | string | `"true"` | | +| loki.lokiCanary.enabled | bool | `true` | | +| loki.minio.address | string | `"loki-storage.{{ .Release.Namespace }}.{{ .Values.global.zone}}:9000"` | | +| loki.minio.enabled | bool | `true` | | +| loki.minio.fullnameOverride | string | `"loki-storage"` | | +| loki.minio.podAnnotations."prometheus.io/path" | string | `"/minio/v2/metrics/cluster"` | | +| loki.minio.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| loki.minio.podLabels."app.kubernetes.io/name" | string | `"loki-storage"` | | +| loki.monitoring.dashboards.enabled | bool | `true` | | +| loki.monitoring.selfMonitoring.enabled | bool | `false` | | +| loki.monitoring.selfMonitoring.grafanaAgent.installOperator | bool | `false` | | +| loki.nameOverride | string | `"loki"` | | +| loki.read.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| loki.read.replicas | int | `1` | | +| loki.resultsCache.allocatedMemory | int | `1024` | | +| loki.sidecar.rules.folder | string | `"/rules/fake"` | | +| loki.sidecar.rules.logLevel | string | `"DEBUG"` | | +| loki.test.canaryServiceAddress | string | `"http://loki-canary:3500/metrics"` | | +| loki.test.enabled | bool | `true` | | +| loki.write.extraArgs[0] | string | `"-log.level=debug"` | | +| loki.write.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| loki.write.replicas | int | `1` | | +| prometheus.alertmanager.enabled | bool | `true` | | +| prometheus.alertmanager.fullnameOverride | string | `"alertmanager"` | | +| prometheus.alertmanager.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| prometheus.alertmanager.service.port | int | `80` | | +| prometheus.configmapReload.prometheus.containerPort | int | `9091` | | +| prometheus.configmapReload.prometheus.extraArgs.log-level | string | `"all"` | | +| prometheus.configmapReload.prometheus.extraArgs.watch-interval | string | `"15s"` | | +| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].configMap | string | `"metrics-alerts"` | | +| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].mountPath | string | `"/etc/config/alerts"` | | +| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].name | string | `"alerts"` | | +| prometheus.configmapReload.prometheus.extraConfigmapMounts[0].readonly | bool | `true` | | +| prometheus.enabled | bool | `true` | | +| prometheus.kube-state-metrics.enabled | bool | `true` | | +| prometheus.kube-state-metrics.fullnameOverride | string | `"kube-state-metrics"` | | +| prometheus.kube-state-metrics.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| prometheus.prometheus-node-exporter.enabled | bool | `true` | | +| prometheus.prometheus-node-exporter.fullnameOverride | string | `"node-exporter"` | | +| prometheus.prometheus-node-exporter.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| prometheus.prometheus-pushgateway.enabled | bool | `false` | | +| prometheus.server.extraArgs."log.level" | string | `"debug"` | | +| prometheus.server.extraConfigmapMounts[0].configMap | string | `"metrics-alerts"` | | +| prometheus.server.extraConfigmapMounts[0].mountPath | string | `"/etc/config/alerts"` | | +| prometheus.server.extraConfigmapMounts[0].name | string | `"alerts"` | | +| prometheus.server.extraConfigmapMounts[0].readonly | bool | `true` | | +| prometheus.server.extraFlags[0] | string | `"web.enable-lifecycle"` | | +| prometheus.server.extraFlags[1] | string | `"enable-feature=remote-write-receiver"` | | +| prometheus.server.fullnameOverride | string | `"prometheus"` | | +| prometheus.server.global.evaluation_interval | string | `"30s"` | | +| prometheus.server.persistentVolume.enabled | bool | `true` | | +| prometheus.server.persistentVolume.size | string | `"12Gi"` | | +| prometheus.server.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| prometheus.server.replicaCount | int | `1` | | +| prometheus.server.retentionSize | string | `"10GB"` | | +| prometheus.server.service.type | string | `"ClusterIP"` | | +| prometheus.server.statefulSet.enabled | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".rule_files[0] | string | `"/etc/config/alerts/*.yaml"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs | list | `[]` | | +| prometheus.testFramework.enabled | bool | `false` | | +| runbookViewer.image | string | `"dannyben/madness"` | | +| sqlExporter.image | string | `"burningalchemist/sql_exporter"` | | -**NOTE:** this Helm chart is not yet stable; do not use it. \ No newline at end of file diff --git a/artifacthub-repo.yaml b/artifacthub-repo.yaml new file mode 100644 index 0000000..dbbc45c --- /dev/null +++ b/artifacthub-repo.yaml @@ -0,0 +1,8 @@ +# This file is uploaded to GCS at helm.coder.com/observability/artifacthub-repo.yml +# and used by ArtifactHub to verify the repository. +repositoryID: 167a0393-cb7e-4f42-af79-02f8a91915f5 +owners: + - name: colin + email: colin@coder.com + - name: Danny Kopping + email: danny@coder.com \ No newline at end of file diff --git a/coder-observability/Chart.lock b/coder-observability/Chart.lock index c574782..1782a88 100644 --- a/coder-observability/Chart.lock +++ b/coder-observability/Chart.lock @@ -1,15 +1,15 @@ dependencies: - name: grafana repository: https://grafana.github.io/helm-charts - version: 7.3.7 + version: 7.3.12 - name: prometheus repository: https://prometheus-community.github.io/helm-charts - version: 25.18.0 + version: 25.24.2 - name: loki repository: https://grafana.github.io/helm-charts - version: 6.3.4 + version: 6.7.4 - name: grafana-agent repository: https://grafana.github.io/helm-charts version: 0.37.0 -digest: sha256:bf2593a78b3934ec78ffcd527947a64d8a7f223912a89d8f6c57ab8f4c4c12a1 -generated: "2024-04-24T14:44:26.109564+02:00" +digest: sha256:05e0dae0200cabf5cb9e2cfb18a4e166fcaceefaf39827addff4299b18c31d4e +generated: "2025-01-16T07:54:38.036598102Z" diff --git a/coder-observability/Chart.yaml b/coder-observability/Chart.yaml index be0d981..9e40bfa 100644 --- a/coder-observability/Chart.yaml +++ b/coder-observability/Chart.yaml @@ -4,28 +4,24 @@ description: Gain insights into your Coder deployment type: application version: 0.1.0 -appVersion: "2.9.2" dependencies: - name: grafana - alias: dashboards - condition: dashboards.enabled + condition: grafana.enabled repository: https://grafana.github.io/helm-charts - version: v7.3.7 + version: '~v7.3.7' - name: prometheus - alias: metrics - condition: metrics.enabled + condition: prometheus.enabled repository: https://prometheus-community.github.io/helm-charts - version: v25.18.0 + version: '~v25.24.1' - name: loki - alias: logs - condition: logs.enabled + condition: loki.enabled repository: https://grafana.github.io/helm-charts - version: v6.3.4 + version: '~v6.7.3' - name: grafana-agent - alias: collector - condition: collector.enabled + alias: grafana-agent + condition: grafana-agent.enabled repository: https://grafana.github.io/helm-charts - version: 0.37.0 + version: '~0.37.0' maintainers: - name: Coder Technologies, Inc. url: https://github.com/coder/observability/issues @@ -37,4 +33,6 @@ keywords: - cde sources: - https://github.com/coder/observability -icon: https://helm.coder.com/coder_logo_black.png \ No newline at end of file +icon: https://helm.coder.com/coder_logo_black.png +annotations: + artifacthub.io/category: monitoring-logging \ No newline at end of file diff --git a/coder-observability/runbooks/coderd.md b/coder-observability/runbooks/coderd.md new file mode 100644 index 0000000..4a42444 --- /dev/null +++ b/coder-observability/runbooks/coderd.md @@ -0,0 +1,135 @@ +# Coderd Runbooks + +## CoderdCPUUsage + +The CPU usage of one or more Coder pods has been close to the limit defined for +the deployment. This can cause slowness in the application, workspaces becoming +unavailable, and may lead to the application failing its liveness probes and +being restarted. + +To resolve this issue, increase the CPU limits of the Coder deployment. + +If you find this occurring frequently, you may wish to check your Coder +deployment against [Coder's Reference Architectures](https://coder.com/docs/v2/latest/admin/architectures). + +## CoderdMemoryUsage + +The memory usage of one or more Coder pods has been close to the limit defined +for the deployment. When the memory usage exceeds the limit, the pod(s) will be +restarted by Kubernetes. This will interrupt all connections to workspaces being +handled by the affected pod(s). + +To resolve this issue, increase the memory limits of the Coder deployment. + +If you find this occurring frequently, check the memory usage over a longer +period of time. If it appears to be increasing monotonically, this is likely a +memory leak and should be considered a bug. + +## CoderdRestarts + +One or more Coder pods have been restarting multiple times in the last 10 +minutes. This may be due to a number of issues, including: + +- Failure to connect to the configured database: Coder requires a reachable + PostgreSQL database to function. If it fails to connect, you will see an error + similar to the following: + + ```console + [warn] ping postgres: retrying error="dial tcp 10.43.94.60:5432: connect: connection refused" try=3 + ``` + +- Out-Of-Memory (OOM) kills due to memory usage (see [above](#codermemoryusage)), +- An unexpected bug causing the application to exit with an error. + +If Coder is not restarting due to excessive memory usage, check the logs: + +1. Check the logs of the deployment for any errors, + +```console +kubectl -n logs deployment/coder --previous +``` + +2. Check any Kubernetes events related to the deployment, + +```console +kubectl -n events --watch +``` + +## CoderdReplicas + +One or more Coderd replicas are down. This may cause availability problems and elevated +response times for user and agent API calls. + +To resolve this issue, review the Coder deployment for possible `CrashLoopBackOff` +instances or re-adjust alarm levels based on the actual number of replicas. + +## CoderdWorkspaceBuildFailures + +A few workspace build errors have been recently observed. + +Review Prometheus metrics to identify failed jobs. Check the workspace build logs +to determine if there is a relationship with a new template version or a buggy +Terraform plugin. + +## CoderdLicenseSeats + +Your Enterprise license is approaching or has exceeded the number of seats purchased. + +Please contact your Coder sales contact, or visit https://coder.com/contact/sales. + +## CoderdIneligiblePrebuilds + +Prebuilds only become eligible to be claimed by users once the workspace's agent is a) running and b) all of its startup +scripts have completed. + +If a prebuilt workspace is not eligible, view its agent logs to diagnose the problem. + +## CoderdUnprovisionedPrebuiltWorkspaces + +The number of running prebuilt workspaces is lower than the desired instances. This could be for several reasons, +ordered by likehood: + +### Experiment/License + +The prebuilds feature is currently gated behind an experiment *and* a premium license. + +Ensure that the prebuilds experiment is enabled with `CODER_EXPERIMENTS=workspace-prebuilds`, and that you have a premium +license added. + +### Preset Validation Issue + +Templates which have prebuilds configured will require a configured preset defined, with ALL of the required parameters +set in the preset. If any of these are missing, or any of the parameters - as defined - fail validation, then the prebuilds +subsystem will refuse to attempt a workspace build. + +Consult the coderd logs for more information; look out for errors or warnings from the prebuilds subsystem. + +### Template Misconfiguration or Error + +Prebuilt workspaces cannot be provisioned due to some issue at `terraform apply`-time. This could be due to misconfigured +cloud resources, improper authorization, or any number of other issues. + +Visit the Workspaces page, change the search term to `owner:prebuilds`, and view on the previously failed builds. The +error will likely be quite obvious. + +### Provisioner Latency + +If your provisioners are overloaded and cannot process provisioner jobs quickly enough, prebuilt workspaces may be affected. +There is no prioritization at present for prebuilt workspace jobs. + +Ensure your provisioners are appropriately resources (i.e. you have enough instances) to handle the concurrent build demand. + +### Use of Workspace Tags + +If you are using `coder_workspace_tags` ([docs](https://coder.com/docs/admin/templates/extending-templates/workspace-tags)) +in your template, chances are you do not have any provisioners running or they are under-resourced (see **Provisioner Latency**). + +Ensure your running provisioners are configured with your desired tags. + +### Reconciliation Loop Issue + +The prebuilds subsystem runs a _reconciliation loop_ which monitors the state of prebuilt workspaces to ensure the desired +number of instances are present at all times. Workspace Prebuilds is currently a BETA feature and so there could be a bug +in this _reconciliation loop_, which should be reported to Coder. + +Examine your coderd logs for any errors or warnings relating to prebuilds. \ No newline at end of file diff --git a/coder-observability/runbooks/postgres.md b/coder-observability/runbooks/postgres.md new file mode 100644 index 0000000..155d848 --- /dev/null +++ b/coder-observability/runbooks/postgres.md @@ -0,0 +1,44 @@ +# Postgres Runbooks + +## PostgresNotificationQueueFillingUp + +Postgres offers asynchronous notification via the `LISTEN` and `NOTIFY` +commands. Coder depends heavily on this async notification mechanism for routine +functionality. + +This may be due to a session executing `LISTEN()` and entering a long +transaction. To verify: + +- Check active sessions with `SELECT * FROM pg_stat_activity;`, +- Check the database log for the PID of the session that is preventing cleanup, +- Kill the query: `SELECT pg_terminate_backend();` + +For more information, see the PostgreSQL documentation available here: + +- [PostgreSQL documentation on `LISTEN`](https://www.postgresql.org/docs/current/sql-listen.html) +- [PostgreSQL documentation on `NOTIFY`](https://www.postgresql.org/docs/current/sql-notify.html) + +## PostgresDown + +Postgres is not currently running, which means the Coder control plane will not be able to read or write any state. +Workspaces may continue to work normally but it is recommended to get Postgres back up as quickly as possible. + +## PostgresConnectionsRunningLow + +PostgreSQL has a `max_connections` setting that determines the maximum number of +concurrent connections. Once this connection limit is reached, no new +connections will be possible. + +To increase the maximum number of concurrent connections, update the `max_connections` +configuration option for your PostgreSQL instance. See the PostgreSQL +documentation for more details. + +**Note:** You may also need to adjust `shared_buffers` after increasing +`max_connections`. Additionally, you may also need to adjust the kernel +configuration value `kernel.shmmax` in `/etc/sysctl.conf` / +`/etc/sysctl.conf.d`. + +For more information, see: + +- [PostgreSQL Documentation: Server Configuration](https://www.postgresql.org/docs/16/runtime-config-file-locations.html) +- [Tuning your PostgreSQL Server](https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server) diff --git a/coder-observability/runbooks/provisionerd.md b/coder-observability/runbooks/provisionerd.md new file mode 100644 index 0000000..9cb0e84 --- /dev/null +++ b/coder-observability/runbooks/provisionerd.md @@ -0,0 +1,9 @@ +# Provisionerd Runbooks + +## ProvisionerdReplicas + +One of more Provisioner replicas is down. Workspace builds may be queued and processed slower. + +To resolve this issue, review the Coder deployment (Coder provisioner pods) +for possible `CrashLoopBackOff` instances or re-adjust alarm levels based on the actual +number of replicas. diff --git a/coder-observability/templates/_collector-config.tpl b/coder-observability/templates/_collector-config.tpl new file mode 100644 index 0000000..555065c --- /dev/null +++ b/coder-observability/templates/_collector-config.tpl @@ -0,0 +1,337 @@ +{{- define "collector-config" -}} +{{ $agent := (index .Values "grafana-agent") }} + +{{ $agent.logging }} +{{ $agent.discovery }} + +discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pods.targets + {{ $agent.commonRelabellings | nindent 2 }} + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_container_id"] + regex = "^(\\w+):\\/\\/.+$" + replacement = "$1" + target_label = "tmp_container_runtime" + } + {{- if $agent.podLogsRelabelRules -}} + {{ $agent.podLogsRelabelRules | trim | nindent 2 }} + {{- end }} +} + +discovery.relabel "pod_metrics" { + targets = discovery.kubernetes.pods.targets + {{ $agent.commonRelabellings | nindent 6 }} + // drop ports that do not expose Prometheus metrics, but might otherwise be exposed by a container which *also* + // exposes an HTTP port which exposes metrics + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + regex = "grpc|http-(memberlist|console)" + action = "drop" + } + // adapted from the Prometheus helm chart + // https://github.com/prometheus-community/helm-charts/blob/862870fc3c847e32479b509e511584d5283126a3/charts/prometheus/values.yaml#L1070 + rule { + source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"] + action = "keep" + regex = "true" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scheme"] + action = "replace" + regex = "(https?)" + target_label = "__scheme__" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"] + action = "replace" + target_label = "__metrics_path__" + regex = "(.+)" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"] + action = "replace" + regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" + replacement = "[$2]:$1" + target_label = "__address__" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"] + action = "replace" + regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" + replacement = "$2:$1" + target_label = "__address__" + } + {{- if $agent.podMetricsRelabelRules -}} + {{ $agent.podMetricsRelabelRules | trim | nindent 2 }} + {{- end }} +} + +local.file_match "pod_logs" { + path_targets = discovery.relabel.pod_logs.output +} + +loki.source.file "pod_logs" { + targets = local.file_match.pod_logs.targets + forward_to = [loki.process.pod_logs.receiver] +} + +loki.process "pod_logs" { + stage.match { + selector = "{tmp_container_runtime=\"containerd\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + // if the label tmp_container_runtime from above is docker parse using docker + stage.match { + selector = "{tmp_container_runtime=\"docker\"}" + // the docker processing stage extracts the following k/v pairs: log, stream, time + stage.docker {} + + // Set the extract stream value as a label + stage.labels { + values = { + stream = "", + } + } + } + + // drop the temporary container runtime label as it is no longer needed + stage.label_drop { + values = ["tmp_container_runtime"] + } + + // parse Coder logs and extract level & logger for efficient filtering + stage.match { + selector = "{pod=~\"coder.*\"}" // TODO: make configurable + + stage.multiline { + firstline = {{ printf `^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3})` | quote }} + max_wait_time = "10s" + } + + stage.regex { + expression = {{ printf `^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3})\s\[(?P\w+)\]\s\s(?P[^:]+):\s(?P.+)` | quote }} + } + + stage.timestamp { + source = "ts" + format = "2006-01-02 15:04:05.000" + action_on_failure = "fudge" // rather have inaccurate time than drop the log line + } + + stage.labels { + values = { + level = "", + logger = "", + } + } + } + + forward_to = [loki.write.loki.receiver] +} +{{ if $agent.extraBlocks -}} +{{ $agent.extraBlocks }} +{{- end }} +loki.write "loki" { + endpoint { + url = "http://{{ include "loki.fullname" .Subcharts.loki }}-gateway.{{ .Release.Namespace }}.{{ .Values.global.zone }}/loki/api/v1/push" + } +} + +prometheus.scrape "pods" { + targets = discovery.relabel.pod_metrics.output + forward_to = [prometheus.relabel.pods.receiver] + + scrape_interval = "{{ .Values.global.telemetry.metrics.scrape_interval }}" + scrape_timeout = "{{ .Values.global.telemetry.metrics.scrape_timeout }}" +} + +// These are metric_relabel_configs while discovery.relabel are relabel_configs. +// See https://github.com/grafana/agent/blob/main/internal/converter/internal/prometheusconvert/prometheusconvert.go#L95-L106 +prometheus.relabel "pods" { + forward_to = [prometheus.remote_write.default.receiver] + + // Drop kube-state-metrics' labels which clash with ours + rule { + source_labels = ["__name__", "container"] + regex = "kube_pod.+;(.+)" + target_label = "container" + replacement = "" + } + rule { + source_labels = ["__name__", "pod"] + regex = "kube_pod.+;(.+)" + target_label = "pod" + replacement = "" + } + rule { + source_labels = ["__name__", "namespace"] + regex = "kube_pod.+;(.+)" + target_label = "namespace" + replacement = "" + } + rule { + source_labels = ["__name__", "exported_container"] + // don't replace an empty label + regex = "^kube_pod.+;(.+)$" + target_label = "container" + replacement = "$1" + } + rule { + source_labels = ["__name__", "exported_pod"] + // don't replace an empty label + regex = "^kube_pod.+;(.+)$" + target_label = "pod" + replacement = "$1" + } + rule { + source_labels = ["__name__", "exported_namespace"] + // don't replace an empty label + regex = "^kube_pod.+;(.+)$" + target_label = "namespace" + replacement = "$1" + } + rule { + regex = "^(exported_.*|image_.*|container_id|id|uid)$" + action = "labeldrop" + } +} + +discovery.relabel "cadvisor" { + targets = discovery.kubernetes.nodes.targets + rule { + replacement = "/metrics/cadvisor" + target_label = "__metrics_path__" + } +} + +prometheus.scrape "cadvisor" { + targets = discovery.relabel.cadvisor.output + forward_to = [ prometheus.relabel.cadvisor.receiver ] + scheme = "https" + tls_config { + insecure_skip_verify = true + } + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + scrape_interval = "{{ .Values.global.telemetry.metrics.scrape_interval }}" + scrape_timeout = "{{ .Values.global.telemetry.metrics.scrape_timeout }}" +} + +prometheus.relabel "cadvisor" { + forward_to = [ prometheus.remote_write.default.receiver ] + + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + // Drop irrelevant series + rule { + source_labels = ["container"] + regex = "^POD$" + action = "drop" + } + // Drop unnecessary labels + rule { + source_labels = ["id"] + target_label = "id" + replacement = "" + } + rule { + source_labels = ["job"] + target_label = "job" + replacement = "" + } + rule { + source_labels = ["name"] + target_label = "name" + replacement = "" + } +} + +prometheus.remote_write "default" { + endpoint { + url ="http://{{ include "prometheus.server.fullname" .Subcharts.prometheus }}.{{ .Release.Namespace }}.{{ .Values.global.zone }}/api/v1/write" + + // drop instance label which unnecessarily adds new series when pods are restarted, since pod IPs are dynamically assigned + // NOTE: "__address__" is mapped to "instance", so will contain : + write_relabel_config { + regex = "instance" + action = "labeldrop" + } + } +} + +{{- if $agent.withOTLPReceiver -}} +otelcol.receiver.otlp "otlp_receiver" { + grpc { + endpoint = "0.0.0.0:4317" + } + http { + endpoint = "0.0.0.0:4318" + } + output { + metrics = [otelcol.processor.batch.default.input] + logs = [otelcol.processor.batch.default.input] + } +} +otelcol.exporter.prometheus "to_prometheus" { + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} +otelcol.exporter.loki "to_loki" { + forward_to = [ + loki.write.loki.receiver, + ] +} +otelcol.processor.batch "default" { + output { + metrics = [otelcol.exporter.prometheus.to_prometheus.input] + logs = [otelcol.exporter.loki.to_loki.input] + } +} +{{- end -}} + +{{ with .Values.global.coder.scrapeMetrics }} +prometheus.scrape "coder_metrics" { + targets = [ + {"__address__" = "{{ .hostname }}:{{ .port }}", {{ include "collector-labels" .additionalLabels | trimSuffix "," }}}, + ] + + forward_to = [prometheus.remote_write.default.receiver] + scrape_interval = "{{ .scrapeInterval }}" +} +{{- end }} +{{- end }} + +{{- define "collector-labels" -}} +{{- range $key, $val := . -}} +{{ $key }} = "{{ $val }}", +{{- end -}} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/_helpers.tpl b/coder-observability/templates/_helpers.tpl index 9907296..0d8578d 100644 --- a/coder-observability/templates/_helpers.tpl +++ b/coder-observability/templates/_helpers.tpl @@ -61,33 +61,64 @@ Create the name of the service account to use {{- end }} {{- end }} -{{/* -Create the name of the service account to use -*/}} -{{- define "coder-observability.datasources" -}} -apiVersion: 1 -datasources: - - name: prometheus - type: prometheus - url: http://prometheus-server.monitoring.svc.cluster.local - access: proxy - isDefault: true - editable: false - - name: loki - type: loki - url: http://loki-gateway.monitoring.svc.cluster.local - access: proxy - isDefault: false - editable: false +{{/* Postgres connector string */}} +{{- define "postgres-connector-string" -}} +{{- if and .Values.global.postgres.password (eq .Values.global.postgres.sslmode "disable") -}} +postgresql://{{ .Values.global.postgres.username }}:{{ urlquery .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }} +{{- else if and .Values.global.postgres.password (ne .Values.global.postgres.sslmode "disable") -}} +{{- if .Values.global.postgres.sslrootcert -}} +postgresql://{{ .Values.global.postgres.username }}:{{ urlquery .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}&sslrootcert={{ .Values.global.postgres.sslrootcert }} +{{- else -}} +postgresql://{{ .Values.global.postgres.username }}:{{ urlquery .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }} +{{- end -}} +{{- else if and .Values.global.postgres.mountSecret (eq .Values.global.postgres.sslmode "disable") -}} +postgresql://{{ .Values.global.postgres.username }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }} +{{- else if and .Values.global.postgres.mountSecret (ne .Values.global.postgres.sslmode "disable") -}} +{{- if .Values.global.postgres.sslrootcert -}} +postgresql://{{ .Values.global.postgres.username }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }}&sslrootcert={{ .Values.global.postgres.sslrootcert }} +{{- else -}} +postgresql://{{ .Values.global.postgres.username }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/{{ .Values.global.postgres.database }}?sslmode={{ .Values.global.postgres.sslmode }} +{{- end -}} +{{- else -}} +{{ fail "either postgres.password or postgres.mountSecret must be defined" }} +{{- end -}} {{- end }} -{{/* -Postgres connector string -*/}} -{{- define "postgres-connector-string" -}} -postgresql://{{ .Values.global.postgres.username }}:{{ .Values.global.postgres.password }}@{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}/postgres?sslmode={{ .Values.global.postgres.sslmode }} +{{/* Postgres connector string */}} +{{- define "postgres-secret-mount" -}} +{{ if .Values.global.postgres.mountSecret }} +envFrom: + - secretRef: + name: {{ .Values.global.postgres.mountSecret }} +{{ end }} {{- end }} {{/* Postgres Exporter does not export a pubsub usage metric by default, so we add one */}} {{- define "postgres-pubsub-queue-usage-metric-name" -}}pg_pubsub_usage{{- end }} +{{/* Build a runbook URL */}} +{{- define "runbook-url" -}} +{{ $outer := . }} +{{- with .Values.global -}} + {{- .externalScheme }}://runbook-viewer.{{ $outer.Release.Namespace }}.{{ .externalZone }}/{{- $outer.service }}#{{- $outer.alert | lower }} +{{- end }} +{{- end }} + +{{- define "coderd-selector" -}} {{- printf "%s, namespace=`%s`" .Values.global.coder.coderdSelector .Values.global.coder.controlPlaneNamespace -}} {{- end }} +{{- define "provisionerd-selector" -}} {{- printf "%s, namespace=`%s`" .Values.global.coder.provisionerdSelector .Values.global.coder.externalProvisionersNamespace -}} {{- end }} +{{- define "workspaces-selector" -}} {{- .Values.global.coder.workspacesSelector -}} {{- end }} +{{- define "non-workspace-selector" -}} {{- printf "namespace=~`(%s|%s)`" (include "control-plane-namespace" .) (include "external-provisioners-namespace" .) -}} {{- end }} +{{- define "control-plane-namespace" -}} {{- .Values.global.coder.controlPlaneNamespace -}} {{- end }} +{{- define "external-provisioners-namespace" -}} {{- .Values.global.coder.externalProvisionersNamespace -}} {{- end }} + +{{/* The collector creates "job" labels in the form // */}} + +{{/* Prometheus job label */}} +{{- define "prometheus-job" -}} {{- printf "%s/%s/%s" .Release.Namespace .Values.prometheus.server.fullnameOverride .Values.prometheus.server.name -}} {{- end }} +{{/* Loki job label */}} +{{- define "loki-job" -}} {{- printf "%s/%s" .Release.Namespace .Values.loki.fullnameOverride -}} {{- end }} +{{/* Grafana Agent job label */}} +{{- define "grafana-agent-job" -}} {{- printf "%s/%s/%s" .Release.Namespace (index .Values "grafana-agent").fullnameOverride "grafana-agent" -}} {{- end }} + +{{- define "dashboard-range" -}} {{ .Values.global.dashboards.timerange }} {{- end }} +{{- define "dashboard-refresh" -}} {{ .Values.global.dashboards.refresh }} {{- end }} \ No newline at end of file diff --git a/coder-observability/templates/configmap-collector.yaml b/coder-observability/templates/configmap-collector.yaml index f3a6dc2..919b089 100644 --- a/coder-observability/templates/configmap-collector.yaml +++ b/coder-observability/templates/configmap-collector.yaml @@ -2,225 +2,7 @@ kind: ConfigMap apiVersion: v1 metadata: - name: {{ .Values.collector.agent.configMap.name }} + name: {{ (index .Values "grafana-agent").agent.configMap.name }} namespace: {{ .Release.Namespace }} data: - config.river: | - logging { - level = "debug" - format = "logfmt" - } - - // read the credentials secret for remote_write authorization - // remote.kubernetes.secret "credentials" { - // namespace = "monitoring" - // name = "primary-credentials-logs" - // } - - discovery.kubernetes "pods" { - role = "pod" - selectors { - role = "pod" - } - } - - discovery.relabel "pod_logs" { - targets = discovery.kubernetes.pods.targets - rule { - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" - } - rule { - source_labels = ["__meta_kubernetes_pod_container_name"] - target_label = "container" - } - rule { - source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"] - separator = "/" - target_label = "job" - } - rule { - source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] - separator = "/" - action = "replace" - replacement = "/var/log/pods/*$1/*.log" - target_label = "__path__" - } - rule { - action = "replace" - source_labels = ["__meta_kubernetes_pod_container_id"] - regex = "^(\\w+):\\/\\/.+$" - replacement = "$1" - target_label = "tmp_container_runtime" - } - } - - // TODO: share common relabelings - discovery.relabel "pod_metrics" { - targets = discovery.kubernetes.pods.targets - - rule { - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" - } - rule { - source_labels = ["__meta_kubernetes_pod_container_name"] - target_label = "container" - } - rule { - source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app_kubernetes_io_component"] - separator = "/" - target_label = "job" - action = "replace" - } - - // adapted from the Prometheus helm chart - // https://github.com/prometheus-community/helm-charts/blob/862870fc3c847e32479b509e511584d5283126a3/charts/prometheus/values.yaml#L1070 - rule { - source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"] - action = "keep" - regex = "true" - } - - rule { - source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scheme"] - action = "replace" - regex = "(https?)" - target_label = "__scheme__" - } - - rule { - source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"] - action = "replace" - target_label = "__metrics_path__" - regex = "(.+)" - } - - rule { - source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"] - action = "replace" - regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" - replacement = "[$2]:$1" - target_label = "__address__" - } - - rule { - source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"] - action = "replace" - regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" - replacement = "$2:$1" - target_label = "__address__" - } - - rule { - action = "labelmap" - regex = "__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)" - replacement = "__param_$1" - } - - rule { - action = "labelmap" - regex = "__meta_kubernetes_pod_label_(.+)" - } - - rule { - source_labels = ["__meta_kubernetes_namespace"] - action = "replace" - target_label = "namespace" - } - - rule { - source_labels = ["__meta_kubernetes_pod_name"] - action = "replace" - target_label = "pod" - } - - rule { - source_labels = ["__meta_kubernetes_pod_phase"] - regex = "Pending|Succeeded|Failed|Completed" - action = "drop" - } - - rule { - source_labels = ["__meta_kubernetes_pod_node_name"] - action = "replace" - target_label = "node" - } - } - - local.file_match "pod_logs" { - path_targets = discovery.relabel.pod_logs.output - } - - loki.source.file "pod_logs" { - targets = local.file_match.pod_logs.targets - forward_to = [loki.process.pod_logs.receiver] - } - - // basic processing to parse the container format. You can add additional processing stages - // to match your application logs. - loki.process "pod_logs" { - stage.match { - selector = "{tmp_container_runtime=\"containerd\"}" - // the cri processing stage extracts the following k/v pairs: log, stream, time, flags - stage.cri {} - // Set the extract flags and stream values as labels - stage.labels { - values = { - flags = "", - stream = "", - } - } - } - - // if the label tmp_container_runtime from above is docker parse using docker - stage.match { - selector = "{tmp_container_runtime=\"docker\"}" - // the docker processing stage extracts the following k/v pairs: log, stream, time - stage.docker {} - - // Set the extract stream value as a label - stage.labels { - values = { - stream = "", - } - } - } - - // drop the temporary container runtime label as it is no longer needed - stage.label_drop { - values = ["tmp_container_runtime"] - } - - forward_to = [loki.write.loki.receiver] - } - - // TODO reference release name - loki.write "loki" { - endpoint { - url = "http://{{ include "loki.fullname" .Subcharts.logs }}-gateway.{{ .Release.Namespace }}.svc.cluster.local/loki/api/v1/push" - // basic_auth { - // username = nonsensitive(remote.kubernetes.secret.credentials.data["username"]) - // password = remote.kubernetes.secret.credentials.data["password"] - // } - } - } - - prometheus.scrape "pods" { - targets = discovery.relabel.pod_metrics.output - forward_to = [prometheus.remote_write.default.receiver] - } - - prometheus.remote_write "default" { - endpoint { - url ="http://{{ include "prometheus.server.fullname" .Subcharts.metrics }}.{{ .Release.Namespace }}.svc.cluster.local/api/v1/write" - } - } \ No newline at end of file + config.river: |- {{- include "collector-config" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/configmap-prometheus-alerts.yaml b/coder-observability/templates/configmap-prometheus-alerts.yaml index d3f35d8..bf9bcc4 100644 --- a/coder-observability/templates/configmap-prometheus-alerts.yaml +++ b/coder-observability/templates/configmap-prometheus-alerts.yaml @@ -2,35 +2,255 @@ apiVersion: v1 kind: ConfigMap metadata: name: metrics-alerts + namespace: {{ .Release.Namespace }} data: - {{ with .Values.global.postgres }} + {{- $service := dict "service" "coderd" -}} + + {{- with .Values.global.coder.alerts.coderd }} {{/* start-section */}} + coderd.yaml: |- + groups: + {{- with .groups.CPU }} + {{- $group := . }} + {{- if .enabled }} + - name: CPU Usage + rules: + {{ $alert := "CoderdCPUUsage" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: max by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "coderd-selector" $ -}} }[{{- $group.period -}}])) / max by(pod) (kube_pod_container_resource_limits{ {{- include "coderd-selector" $ -}}, resource="cpu"}) > {{ $threshold }} + for: {{ $group.delay }} + annotations: + summary: The Coder instance {{ `{{ $labels.pod }}` }} is using high amounts of CPU, which may impact application performance. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- with .groups.Memory }} + {{- $group := . }} + {{- if .enabled }} + - name: Memory Usage + rules: + {{ $alert := "CoderdMemoryUsage" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: max by (pod) (container_memory_working_set_bytes{ {{- include "coderd-selector" $ -}} }) / max by (pod) (kube_pod_container_resource_limits{ {{- include "coderd-selector" $ -}}, resource="memory"}) > {{ $threshold }} + for: {{ $group.delay }} + annotations: + summary: The Coder instance {{ `{{ $labels.pod }}` }} is using high amounts of memory, which may lead to an Out-Of-Memory (OOM) error. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- with .groups.Restarts }} + {{- $group := . }} + {{- if .enabled }} + - name: Pod Restarts + rules: + {{ $alert := "CoderdRestarts" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{ {{- include "coderd-selector" $ -}} }[{{- $group.period -}}])) > {{ $threshold }} + for: {{ $group.delay }} + annotations: + summary: The Coder instance {{ `{{ $labels.pod }}` }} has restarted multiple times in the last {{ $group.period -}}, which may indicate a CrashLoop. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- with .groups.Replicas }} + {{- $group := . }} + {{- if .enabled }} + - name: Coderd Replicas + rules: + {{ $alert := "CoderdReplicas" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: sum(up{ {{- include "coderd-selector" $ -}} }) < {{ $threshold }} + for: {{ $group.delay }} + annotations: + summary: Number of alive coderd replicas is below the threshold = {{ $threshold -}}. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- with .groups.WorkspaceBuildFailures }} + {{- $group := . }} + {{- if .enabled }} + - name: Coderd Workspace Build Failures + rules: + {{ $alert := "CoderdWorkspaceBuildFailures" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: sum(increase(coderd_workspace_builds_total{ {{- include "coderd-selector" $ -}} , status="failed" }[{{- $group.period -}}])) > {{ $threshold }} + for: {{ $group.delay }} + annotations: + summary: Workspace builds have failed multiple times in the last {{ $group.period -}}, which may indicate a broken Coder template. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- with .groups.IneligiblePrebuilds }} + {{- $group := . }} + {{- if .enabled }} + - name: Coderd Ineligible Prebuilds + rules: + {{ $alert := "CoderdIneligiblePrebuilds" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_running - coderd_prebuilt_workspaces_eligible) > 0 + for: {{ $group.delay }} + annotations: + summary: > + {{ `{{ $value }}` }} prebuilt workspace(s) are currently ineligible for claiming for the "{{ `{{ $labels.template_name }}` }}" template and "{{ `{{ $labels.preset_name }}` }}" preset. + This usually indicates that the agent has not started correctly, or is still running its startup scripts after an extended period of time. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- with .groups.UnprovisionedPrebuiltWorkspaces }} + {{- $group := . }} + {{- if .enabled }} + - name: Coderd Unprovisioned Prebuilt Workspaces + rules: + {{ $alert := "CoderdUnprovisionedPrebuiltWorkspaces" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_desired - coderd_prebuilt_workspaces_running) > 0 + for: {{ $group.delay }} + annotations: + summary: > + {{ `{{ $value }}` }} prebuilt workspace(s) not yet been provisioned for the "{{ `{{ $labels.template_name }}` }}" template and "{{ `{{ $labels.preset_name }}` }}" preset. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- end }} {{/* end-section */}} + + + {{- with .Values.global.coder.alerts.provisionerd }} {{/* start-section */}} + provisionerd.yaml: |- + groups: + {{- with .groups.Replicas }} + {{- $group := . }} + {{- if .enabled }} + - name: Provisionerd Replicas + rules: + {{ $alert := "ProvisionerdReplicas" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: sum(coderd_provisionerd_num_daemons{ {{- include "coderd-selector" $ -}} }) < {{ $threshold }} + for: {{ $group.delay }} + annotations: + summary: Number of alive provisionerd replicas is below the threshold = {{ $threshold -}}. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + + {{- end }} {{/* end-section */}} + + + {{- $service = dict "service" "enterprise" -}} + + {{- with .Values.global.coder.alerts.enterprise }} {{/* start-section */}} + enterprise.yaml: |- + groups: + {{- with .groups.Licences }} + {{- $group := . }} + {{- if .enabled }} + - name: Licences + rules: + {{ $alert := "CoderLicenseSeats" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: 'max(coderd_license_active_users) / max(coderd_license_limit_users) >= {{- $threshold }}' + for: {{ $group.delay }} + annotations: + summary: Your Coder enterprise licence usage is now at {{ `{{ $value | humanizePercentage }}` }} capacity. + labels: + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} {{/* end-section */}} + + {{- $service = dict "service" "postgres" -}} + {{- with .Values.global.postgres }} postgres.yaml: |- + groups: {{- with .alerts.groups.Notifications }} {{- $group := . -}} {{- if .enabled }} - groups: - name: Notifications rules: + {{ $alert := "PostgresNotificationQueueFillingUp" }} {{- range $severity, $threshold := .thresholds }} - - alert: PostgresNotificationQueueFillingUp + - alert: {{ $alert }} expr: {{ include "postgres-pubsub-queue-usage-metric-name" . }} > {{ $threshold }} for: {{ $group.delay }} + annotations: + summary: The postgres instance {{ `{{ $labels.instance }}` }} has a notification that is filling up, which may impact application performance. labels: severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} {{- end }} {{- end -}} {{- end -}} {{- with .alerts.groups.Basic }} {{ $group := . -}} {{- if .enabled }} - groups: - - name: Basic + - name: Liveness rules: - - alert: PostgresDown + {{ $alert := "PostgresDown" }} + - alert: {{ $alert }} expr: pg_up == 0 for: {{ $group.delay }} + annotations: + summary: The postgres instance {{ `{{ $labels.instance }}` }} is down! labels: severity: critical + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} {{- end }} {{ end }} - {{ end }} \ No newline at end of file + {{- with .alerts.groups.Connections }} + {{ $group := . -}} + {{- if .enabled }} + - name: Connections + rules: + {{ $alert := "PostgresConnectionsRunningLow" }} + {{- range $severity, $threshold := .thresholds }} + - alert: {{ $alert }} + expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * {{ $threshold }}) + for: {{ $group.delay }} + labels: + summary: The postgres instance {{ `{{ $labels.instance }}` }} is running low on connections which may impact application performance. + severity: {{ $severity }} + runbook_url: {{ template "runbook-url" (deepCopy $ | merge (dict "alert" $alert) $service) }} + {{- end }} + {{- end -}} + {{- end -}} + {{ end }} diff --git a/coder-observability/templates/configmap-runbooks.yaml b/coder-observability/templates/configmap-runbooks.yaml new file mode 100644 index 0000000..80eb085 --- /dev/null +++ b/coder-observability/templates/configmap-runbooks.yaml @@ -0,0 +1,10 @@ +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: runbooks + namespace: {{ .Release.Namespace }} + annotations: + checksum/config: {{ (.Files.Glob "runbooks/**").AsConfig | indent 2 | sha256sum }} +data: +{{ (.Files.Glob "runbooks/**").AsConfig | indent 2 }} \ No newline at end of file diff --git a/coder-observability/templates/configmap-sql-exporter.yaml b/coder-observability/templates/configmap-sql-exporter.yaml index e6b3a50..08f6d9c 100644 --- a/coder-observability/templates/configmap-sql-exporter.yaml +++ b/coder-observability/templates/configmap-sql-exporter.yaml @@ -2,6 +2,7 @@ apiVersion: v1 kind: ConfigMap metadata: name: sql-exporter-config + namespace: {{ .Release.Namespace }} data: config.yaml: |- global: @@ -14,9 +15,10 @@ data: - collector_name: notify metrics: # Add a metric to show the current usage of the Postgres "pub/sub" mechanism + # See https://www.postgresql.org/docs/current/functions-info.html - metric_name: {{ include "postgres-pubsub-queue-usage-metric-name" . }} type: gauge - help: 'TODO.' + help: "The fraction (0–1) of the asynchronous notification queue's maximum size that is currently occupied by notifications that are waiting to be processed" static_labels: hostname: {{ .Values.global.postgres.hostname }} database: {{ .Values.global.postgres.database }} diff --git a/coder-observability/templates/dashboards/_dashboards_coderd.json.tpl b/coder-observability/templates/dashboards/_dashboards_coderd.json.tpl new file mode 100644 index 0000000..20a0ece --- /dev/null +++ b/coder-observability/templates/dashboards/_dashboards_coderd.json.tpl @@ -0,0 +1,1474 @@ +{{ define "coderd-dashboard.json" }} +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Down" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(up{ {{- include "coderd-selector" . -}} } == 1) or vector(0)", + "instant": true, + "legendFormat": "Up", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(count(up{ {{- include "coderd-selector" . -}} } == 0) or vector(0)) > 0", + "hide": false, + "instant": true, + "legendFormat": "Down", + "range": false, + "refId": "B" + } + ], + "title": "Replicas", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 18, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "One or more replicas are required to be running in order to serve the control-plane.\n\nSee [High Availability](https://coder.com/docs/v2/latest/admin/high-availability) for details on how to\nrun multiple `coderd` replicas.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.9 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Enabled" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "0": { + "index": 1, + "text": "No" + }, + "1": { + "index": 0, + "text": "Yes" + } + }, + "type": "value" + }, + { + "options": { + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + } + ] + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 32, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_license_user_limit_enabled)", + "instant": true, + "legendFormat": "Enabled", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(\n max(coderd_license_active_users) / max(coderd_license_limit_users)\n) > 0", + "hide": false, + "instant": false, + "legendFormat": "Usage", + "range": true, + "refId": "B" + } + ], + "title": "Enterprise License", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 33, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "If you would like to try Coder's [Enterprise features](https://coder.com/docs/v2/latest/enterprise), you can [request a trial license](https://coder.com/docs/v2/latest/faqs#how-do-i-add-an-enterprise-license).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Requested|Limit)/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.drawStyle", + "value": "line" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "coderd-selector" . -}} }[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_limits{ {{- include "coderd-selector" . -}} , resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_requests{ {{- include "coderd-selector" . -}} , resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "B" + } + ], + "title": "CPU Usage Seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 6 + }, + "id": 26, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The cumulative CPU used per core-second. If `coderd` was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "shades" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 6 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (reason) (\n count_over_time(kube_pod_container_status_terminated_reason{ {{- include "coderd-selector" . -}} }[$__interval])\n)", + "hide": false, + "instant": false, + "legendFormat": {{ printf "{{reason}}" | quote }}, + "range": true, + "refId": "C" + } + ], + "title": "Terminations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.0001 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 16, + "y": 6 + }, + "id": 34, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(kube_pod_container_status_restarts_total{ {{- include "coderd-selector" . -}} }[$__range]))", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "B" + } + ], + "title": "Restarts", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 6 + }, + "id": 31, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Requested|Limit)/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.drawStyle", + "value": "line" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (pod) (container_memory_working_set_bytes{ {{- include "coderd-selector" . -}} })", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_limits{ {{- include "coderd-selector" . -}} , resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_requests{ {{- include "coderd-selector" . -}} , resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "B" + } + ], + "title": "RAM Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 12 + }, + "id": 28, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the total memory used by each `coderd` container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 100 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 12 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.5, coder_pubsub_send_latency_seconds)", + "instant": false, + "legendFormat": "Send", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.5, coder_pubsub_receive_latency_seconds)", + "hide": false, + "instant": false, + "legendFormat": "Receive", + "range": true, + "refId": "B" + } + ], + "title": "Pubsub Latency (Median)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 16, + "y": 12 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "(\n sum(increase(coder_pubsub_latency_measure_errs_total[$__range]))\n / count(coder_pubsub_latency_measure_errs_total)\n) or vector(0)", + "hide": false, + "instant": false, + "legendFormat": "Errors", + "range": true, + "refId": "B" + } + ], + "title": "Pubsub Errors", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 19, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "`coderd` uses Postgres for passing messages between subcomponents for coordination and signalling;\nthis is called \"pubsub\" (or publish-subscribe).\n\nWe measure the time for messages to be sent and received. Latencies higher than 500ms will likely lead to\nyour Coder deployment feeling sluggish. High latency is usually an indication that your Postgres server is under-resourced on CPU.\n\nHigh values for median should be concerning,\nwhile the 90th percentile shows the outliers.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 100 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 15 + }, + "id": 21, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.9, coder_pubsub_send_latency_seconds)", + "instant": false, + "legendFormat": "Send", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.9, coder_pubsub_receive_latency_seconds)", + "hide": false, + "instant": false, + "legendFormat": "Receive", + "range": true, + "refId": "B" + } + ], + "title": "Pubsub Latency (P90)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(pod) (rate(coderd_api_requests_processed_total{ {{- include "coderd-selector" . -}} }[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "API Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 36, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the number of requests per second each `coderd` replica is handling.\n\nHeavy skewing towards a single `coderd` replica indicates faulty loadbalancing.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "{{- include "dashboard-refresh" . -}}", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-{{- include "dashboard-range" . -}}", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Control Plane", + "uid": "coderd", + "version": 6, + "weekStart": "" +} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/_dashboards_prebuilds.json.tpl b/coder-observability/templates/dashboards/_dashboards_prebuilds.json.tpl new file mode 100644 index 0000000..938b501 --- /dev/null +++ b/coder-observability/templates/dashboards/_dashboards_prebuilds.json.tpl @@ -0,0 +1,1050 @@ +{{ define "prebuilds-dashboard.json" }} +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 10, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "orange", + "index": 2, + "text": "Not enabled" + }, + "1": { + "color": "green", + "index": 0, + "text": "Enabled" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 1, + "text": "Not enabled" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 15 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(coderd_experiments{experiment=\"workspace-prebuilds\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Experiment enabled?", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 49, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max(coderd_prebuilt_workspaces_desired) by (template_name, preset_name)) or vector(0)", + "instant": true, + "interval": "", + "legendFormat": "Desired", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max(coderd_prebuilt_workspaces_running) by (template_name, preset_name)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Running", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max(coderd_prebuilt_workspaces_eligible) by (template_name, preset_name)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Eligible", + "range": false, + "refId": "E" + } + ], + "title": "Current: Global", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 48, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_created_total)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Created", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_failed_total)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Failed", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_claimed_total)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Claimed", + "range": false, + "refId": "A" + } + ], + "title": "All Time: Global", + "type": "stat" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "panels": [], + "repeat": "template", + "repeatDirection": "h", + "title": "$template", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 31, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "instant": true, + "interval": "", + "legendFormat": "Desired", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Running", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Eligible", + "range": false, + "refId": "E" + } + ], + "title": "Current: $preset", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 18, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Desired" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "custom.fillOpacity", + "value": 85 + }, + { + "id": "custom.fillBelowTo", + "value": "Running" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + }, + { + "id": "custom.fillBelowTo", + "value": "Eligible" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Eligible" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 4, + "y": 5 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "instant": false, + "interval": "", + "legendFormat": "Desired", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Running", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Eligible", + "range": true, + "refId": "E" + } + ], + "title": "Pool Capacity: $preset", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 13, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Created" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Desired" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Eligible" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Claimed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 12, + "y": 5 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "floor(max(increase(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Created", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "floor(max(increase(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Failed", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "floor(max(increase(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Claimed", + "range": true, + "refId": "F" + } + ], + "title": "Pool Operations: $preset", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 5 + }, + "id": 1, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Created", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Failed", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Claimed", + "range": false, + "refId": "A" + } + ], + "title": "All Time: $preset", + "type": "stat" + } + ], + "refresh": "{{- include "dashboard-refresh" . -}}", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_prebuilt_workspaces_desired,template_name)", + "hide": 0, + "includeAll": false, + "label": "Template", + "multi": false, + "name": "template", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_prebuilt_workspaces_desired,template_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": "", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)", + "hide": 0, + "includeAll": true, + "label": "Preset", + "multi": true, + "name": "preset", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-{{- include "dashboard-range" . -}}", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Prebuilds", + "uid": "cej6jysyme22oa", + "version": 13, + "weekStart": "" +} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/_dashboards_provisionerd.json.tpl b/coder-observability/templates/dashboards/_dashboards_provisionerd.json.tpl new file mode 100644 index 0000000..9b855a5 --- /dev/null +++ b/coder-observability/templates/dashboards/_dashboards_provisionerd.json.tpl @@ -0,0 +1,1021 @@ +{{ define "provisionerd-dashboard.json" }} +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 17, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`})", + "instant": true, + "legendFormat": "Built-in", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{ {{- include "provisionerd-selector" . -}} })", + "hide": false, + "instant": true, + "legendFormat": "External", + "range": false, + "refId": "B" + } + ], + "title": "Provisioners", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 20, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Provisioners are responsible for building workspaces.\n\n`coderd` runs built-in provisioners by default. Control this with the `CODER_PROVISIONER_DAEMONS` environment variable or `--provisioner-daemons` flag.\n\nYou can also consider [External Provisioners](https://coder.com/docs/v2/latest/admin/provisioners). Running both built-in and external provisioners is perfectly valid,\nalthough dedicated (external) provisioners will generally give the best build performance.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 21, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(sum(coderd_provisionerd_jobs_current) > 0) or vector(0)", + "instant": false, + "legendFormat": "Current", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons)", + "hide": false, + "instant": true, + "legendFormat": "Capacity", + "range": false, + "refId": "B" + } + ], + "title": "Builds", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 22, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The maximum number of simultaneous builds is equivalent to the number of `provisionerd` daemons running.\n\nThe \"Capacity\" panel shows the how many simultaneous builds are possible.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 7 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.5, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))", + "hide": false, + "instant": true, + "legendFormat": "Median", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.9, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))", + "hide": false, + "instant": true, + "legendFormat": "90th Percentile", + "range": false, + "refId": "A" + } + ], + "title": "Build Times", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 7 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the median and 90th percentile workspace build times.\n\nLong build times can impede developers' productivity while they wait for workspaces to start or be created.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Failure" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Success" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 7 + }, + "id": 25, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count[$__interval]))", + "hide": false, + "instant": false, + "interval": "1h", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Build Count Per Hour", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 7 + }, + "id": 26, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "_NOTE: this will not show the current hour._", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Limit|Requested)/" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 14 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "provisionerd-selector" . -}} }[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_limits{ {{- include "provisionerd-selector" . -}} , resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_requests{ {{- include "provisionerd-selector" . -}} , resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "C" + } + ], + "title": "CPU Usage Seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 14 + }, + "id": 30, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The cumulative CPU used per core-second. If the process was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Limit|Requested)/" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 14 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (pod) (container_memory_working_set_bytes{ {{- include "provisionerd-selector" . -}} })", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_limits{ {{- include "provisionerd-selector" . -}} , resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_requests{ {{- include "provisionerd-selector" . -}} , resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "C" + } + ], + "title": "RAM Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 14 + }, + "id": 31, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the total memory used by each container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 18, + "w": 18, + "x": 0, + "y": 21 + }, + "id": 27, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{ {{- include "non-workspace-selector" . -}}, logger=~\"(.*runner|terraform|provisioner.*)\"}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 21 + }, + "id": 32, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This panel shows all logs across built-in and [external provisioners](https://coder.com/docs/v2/latest/admin/provisioners).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "{{- include "dashboard-refresh" . -}}", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-{{- include "dashboard-range" . -}}", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Provisioners", + "uid": "provisionerd", + "version": 10, + "weekStart": "" +} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/_dashboards_status.json.tpl b/coder-observability/templates/dashboards/_dashboards_status.json.tpl new file mode 100644 index 0000000..6a96f7e --- /dev/null +++ b/coder-observability/templates/dashboards/_dashboards_status.json.tpl @@ -0,0 +1,2076 @@ +{{ define "status-dashboard.json" }} +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": false, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "title": "Application", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Down" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(up{ {{- include "coderd-selector" . -}} } == 1) or vector(0) > 0", + "instant": true, + "legendFormat": "Up", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(up{ {{- include "coderd-selector" . -}} } == 0) or vector(0) > 0", + "hide": false, + "instant": true, + "legendFormat": "Down", + "range": false, + "refId": "B" + } + ], + "title": "Coder Replicas", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{ {{- include "coderd-selector" . -}} })", + "instant": true, + "legendFormat": "Built-in", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{ {{- include "provisionerd-selector" . -}} })", + "hide": false, + "instant": true, + "legendFormat": "External", + "range": false, + "refId": "B" + } + ], + "title": "Provisioners", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Failed" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Success" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 17, + "options": { + "displayLabels": [ + "name", + "value" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "round(sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count{pod!=``}[$__range])))", + "instant": true, + "legendFormat": {{ printf "{{status}}" | quote }}, + "range": false, + "refId": "A" + } + ], + "title": "Workspace Builds", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(kube_pod_status_ready{condition=\"true\", {{ include "workspaces-selector" . -}}} == 1)\nor\ncount(coderd_api_workspace_latest_build{status=\"running\"})\nor\nvector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Running Workspaces", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*RAM/" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{ {{- include "coderd-selector" . -}} }[1h:1m])\n [$__range:]\n )\n)", + "instant": true, + "legendFormat": "Control Plane CPU", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{ {{- include "provisionerd-selector" . -}} }[1h:1m])\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "Provisioner CPU", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{ {{- include "coderd-selector" . -}} }\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "Control Plane RAM", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{ {{- include "provisionerd-selector" . -}} }\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "Provisioner RAM", + "range": false, + "refId": "D" + } + ], + "title": "Resource Usage High Watermark (Cumulative)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 19, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(pg_up) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Postgres", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "panels": [], + "title": "Observability Tools", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"{{- include "prometheus-job" . -}}\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Prometheus", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"{{- include "loki-job" . -}}/write\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Write Path", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 9 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"{{- include "loki-job" . -}}/read\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Read Path", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 9 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"{{- include "loki-job" . -}}/backend\", container=\"loki\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Backend", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 9 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"{{- include "loki-job" . -}}/canary\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Canary", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 9 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"{{- include "grafana-agent-job" . -}}\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Grafana Agent", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Unhealthy" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "prometheus_config_last_reload_successful{job=\"{{- include "prometheus-job" . -}}\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Prometheus Config", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Unhealthy" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 14 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(loki_runtime_config_last_reload_successful) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Config", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Unhealthy" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 14 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "min(agent_config_last_load_successful{job=\"{{- include "grafana-agent-job" . -}}\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Grafana Agent Config", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Retention Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Write-Ahead Log" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Storage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f9f9fb", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 14 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "(\n prometheus_tsdb_wal_storage_size_bytes{job=\"{{- include "prometheus-job" . -}}\"} +\n prometheus_tsdb_storage_blocks_bytes{job=\"{{- include "prometheus-job" . -}}\"} +\n prometheus_tsdb_symbol_table_size_bytes{job=\"{{- include "prometheus-job" . -}}\"}\n)\n/\nprometheus_tsdb_retention_limit_bytes{job=\"{{- include "prometheus-job" . -}}\"}", + "instant": false, + "legendFormat": "Retention limit used", + "range": true, + "refId": "A" + } + ], + "title": "Prometheus Storage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 14 + }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 35 + }, + "textMode": "auto", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{namespace=\"{{- .Release.Namespace -}}\", resource=\"cpu\"})", + "hide": false, + "instant": true, + "legendFormat": "Requested", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{namespace=\"{{- .Release.Namespace -}}\"}[$__rate_interval])\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "High Watermark", + "range": false, + "refId": "D" + } + ], + "title": "CPU", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 14 + }, + "id": 21, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 35 + }, + "textMode": "value_and_name", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{namespace=\"{{- .Release.Namespace -}}\", resource=\"memory\"})", + "hide": false, + "instant": true, + "legendFormat": "Requested", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(container_memory_working_set_bytes{namespace=\"{{- .Release.Namespace -}}\"}[$__range])\n)", + "instant": true, + "legendFormat": "High Watermark", + "range": false, + "refId": "A" + } + ], + "title": "RAM", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Status", + "uid": "coder-status", + "version": 1, + "weekStart": "" +} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/_dashboards_workspace_detail.json.tpl b/coder-observability/templates/dashboards/_dashboards_workspace_detail.json.tpl new file mode 100644 index 0000000..713cc9a --- /dev/null +++ b/coder-observability/templates/dashboards/_dashboards_workspace_detail.json.tpl @@ -0,0 +1,1344 @@ +{{ define "workspace-detail-dashboard.json" }} +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "description": "", + "gridPos": { + "h": 1.2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 28, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "**HINT**: use the dropdowns above to filter by specific workspace(s).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPUs Requested" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM Requested" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PVC Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 20, + "x": 0, + "y": 1.2 + }, + "id": 29, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 40 + }, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "group by (template_name) (coderd_agents_up{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Template Name", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "group by (template_version) (coderd_agents_up{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Template Version", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "group by (username) (coderd_agents_up{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Owner", + "range": false, + "refId": "C" + } + ], + "title": "Details", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #A": true, + "Value #B": true, + "Value #C": true, + "Value #D": true + }, + "includeByName": {}, + "indexByName": { + "CPUs Requested": 7, + "PVC Capacity": 9, + "RAM Requested": 8, + "Time": 0, + "Value #A": 5, + "Value #B": 3, + "Value #C": 6, + "template_name": 2, + "template_version": 4, + "username": 1 + }, + "renameByName": { + "Value #C": "", + "lifecycle_state": "Agent State", + "template_name": "Template", + "template_version": "Template Version", + "username": "Owner" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1.2 + }, + "id": 38, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Essential information about the selected workspace.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPUs Requested" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM Requested" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PVC Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 20, + "x": 0, + "y": 5.2 + }, + "id": 36, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/" + }, + "orientation": "vertical", + "textMode": "value_and_name", + "wideLayout": false, + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 40 + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", {{ include "workspaces-selector" . -}}, resource=\"cpu\"})", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "CPUs Requested", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", {{ include "workspaces-selector" . -}}, resource=\"memory\"})", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "RAM Requested", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\".*$workspace_name.*\", {{- include "workspaces-selector" . -}} }\n * on(persistentvolumeclaim) group_right\n group by (persistentvolumeclaim, persistentvolume) (\n label_replace(\n kube_persistentvolume_claim_ref,\n \"persistentvolumeclaim\",\n \"$1\",\n \"name\",\n \"(.+)\"\n )\n )\n * on (persistentvolume)\n kube_persistentvolume_capacity_bytes\n)", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "PVC Capacity", + "range": false, + "refId": "F" + } + ], + "title": "Resources", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #A": true, + "Value #B": true, + "Value #C": true, + "Value #D": true + }, + "includeByName": {}, + "indexByName": { + "CPUs Requested": 7, + "PVC Capacity": 9, + "RAM Requested": 8, + "Time": 0, + "Value #A": 5, + "Value #B": 3, + "Value #C": 6, + "template_name": 2, + "template_version": 4, + "username": 1 + }, + "renameByName": { + "Value #C": "", + "lifecycle_state": "Agent State", + "template_name": "Template", + "template_version": "Template Version", + "username": "Owner" + } + } + } + ], + "type": "stat", + "description": "" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "created": { + "color": "light-blue", + "index": 1, + "text": "Created" + }, + "off": { + "color": "text", + "index": 8, + "text": "Off" + }, + "ready": { + "color": "green", + "index": 0, + "text": "Ready" + }, + "shutdown_error": { + "color": "red", + "index": 7, + "text": "Shutdown Error" + }, + "shutdown_timeout": { + "color": "purple", + "index": 6, + "text": "Shutdown Timeout" + }, + "shutting_down": { + "color": "light-purple", + "index": 5, + "text": "Shutting Down" + }, + "start_error": { + "color": "red", + "index": 4, + "text": "Start Error" + }, + "start_timeout": { + "color": "orange", + "index": 3, + "text": "Start Timeout" + }, + "starting": { + "color": "super-light-green", + "index": 2, + "text": "Starting" + } + }, + "type": "value" + }, + { + "options": { + "match": "empty", + "result": { + "color": "text", + "index": 9, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null", + "result": { + "color": "text", + "index": 10, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 9.2 + }, + "id": 35, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^lifecycle_state$/", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 50 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (lifecycle_state) (coderd_agents_connections{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "D" + } + ], + "title": "Agent Lifecycle State", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "-1": { + "color": "light-orange", + "index": 0, + "text": "Not completed yet" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 60 + }, + { + "color": "red", + "value": 120 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 4, + "y": 9.2 + }, + "id": 33, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 50 + }, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_agentstats_startup_script_seconds{workspace_name=~\"$workspace_name\"}) or vector(-1)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "C" + } + ], + "title": "Agent Startup Script Execution Time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 7, + "y": 9.2 + }, + "id": 39, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 50 + }, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (app) (\n label_replace(\n {workspace_name=~\"$workspace_name\", __name__=~\"coderd_agentstats_session_count_.*\"},\n \"app\",\n \"$1\",\n \"__name__\",\n \"coderd_agentstats_session_count_(.*)\"\n )\n)>0", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": {{ printf "{{app}}" | quote }}, + "range": false, + "refId": "C" + } + ], + "title": "App Session Counts", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Bytes/" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 10, + "y": 9.2 + }, + "id": 34, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 50 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_agents_connection_latencies_seconds{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Connection Latency", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_rx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Received Bytes", + "range": false, + "refId": "rx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_tx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Transmitted Bytes", + "range": false, + "refId": "tx" + } + ], + "title": "Networking", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #A": "Received Bytes", + "Value #B": "Transmitted Bytes", + "Value #C": "Connection Latency", + "Value #rx": "Received Bytes", + "Value #tx": "Transmitted Bytes" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 9.2 + }, + "id": 40, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Essential information about this workspace's agent.\n\nRead more about the agent [here](https://coder.com/docs/v2/latest/about/architecture#agents).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "failed": { + "color": "orange", + "index": 1, + "text": "Failure" + }, + "success": { + "color": "green", + "index": 0, + "text": "Success" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workspace Transition" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "DESTROY": { + "color": "red", + "index": 0 + }, + "START": { + "color": "blue", + "index": 1 + }, + "STOP": { + "color": "purple", + "index": 2 + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 15.2 + }, + "id": 6, + "interval": "", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Time" + } + ] + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"}\n) > 0", + "format": "table", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Build Log", + "transformations": [ + { + "disabled": true, + "id": "groupBy", + "options": { + "fields": { + "Count": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Status": { + "aggregations": [], + "operation": "groupby" + }, + "Template Name": { + "aggregations": [], + "operation": "groupby" + }, + "Template Version": { + "aggregations": [], + "operation": "groupby" + }, + "Total": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Value": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Workspace Name": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Ownert": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Transition": { + "aggregations": [], + "operation": "groupby" + }, + "status": { + "aggregations": [], + "operation": "groupby" + }, + "template_name": { + "aggregations": [], + "operation": "groupby" + }, + "template_version": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_name": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_owner": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_transition": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Value" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Count", + "Value (sum)": "Total", + "status": "Status", + "template_name": "Template Name", + "template_version": "Template Version", + "workspace_name": "Workspace Name", + "workspace_owner": "Workspace Owner", + "workspace_transition": "Workspace Transition" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 15.2 + }, + "id": 37, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 20, + "x": 0, + "y": 22.2 + }, + "id": 7, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": {{ printf "{%s, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" | line_format `{{ printf \"[\\033[35m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`" (include "non-workspace-selector" .) | quote }}, + "hide": false, + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": {{ printf "{%s, pod=~\".*($workspace_name).*\"} | line_format `{{ printf \"[\\033[32m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`" (include "workspaces-selector" .) | quote }}, + "hide": false, + "queryType": "range", + "refId": "B" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 22.2 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The logs to the left come both from provisioners and workspace logs.\n\nProvisioner logs matching the name filter are highlighted in magenta, while\nworkspace logs matching the name filter are highlighted in green.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "{{- include "dashboard-refresh" . -}}", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_agents_up,workspace_name)", + "hide": 0, + "includeAll": false, + "label": "Workspace Name Filter", + "multi": false, + "name": "workspace_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_agents_up,workspace_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-{{- include "dashboard-range" . -}}", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Workspace Detail", + "uid": "workspace-detail", + "version": 9, + "weekStart": "" +} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/_dashboards_workspaces.json.tpl b/coder-observability/templates/dashboards/_dashboards_workspaces.json.tpl new file mode 100644 index 0000000..afd52d8 --- /dev/null +++ b/coder-observability/templates/dashboards/_dashboards_workspaces.json.tpl @@ -0,0 +1,1626 @@ +{{ define "workspaces-dashboard.json" }} +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "description": "", + "gridPos": { + "h": 1.2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 28, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "**HINT**: use the dropdowns above to filter by specific workspaces and/or templates.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1.2 + }, + "id": 31, + "title": "Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 2.2 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "mean", + "stdDev", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{ {{- include "workspaces-selector" . -}} }[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 2.2 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ + "mean", + "stdDev", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (pod) (container_memory_working_set_bytes{ {{- include "workspaces-selector" . -}} })", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "RAM Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 2.2 + }, + "id": 36, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The cumulative CPU used per core-second. If a workspace was using a full CPU core, that would be represented as 1 second.\n\nSee the Kubernetes [documentation](https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units) for more details.\n\nThe total memory used by each workspace container is represented; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 10.2 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n round(increase(kube_pod_container_status_restarts_total{ {{- include "workspaces-selector" . -}} }[$__interval]))\n) > 0", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Pod Restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 10.2 + }, + "id": 39, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod, reason) (\n count_over_time(kube_pod_container_status_terminated_reason{ {{- include "workspaces-selector" . -}} }[$__interval])\n)", + "hide": false, + "instant": false, + "legendFormat": {{ printf "{{pod}}:{{reason}}" | quote }}, + "range": true, + "refId": "B" + } + ], + "title": "Terminations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 10.2 + }, + "id": 40, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible\n\nPod restarts are not necessarily problematic, but they are worth noting.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18.2 + }, + "id": 30, + "panels": [], + "title": "Builds", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DESTROY" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "STOP" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "START" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 19.2 + }, + "id": 2, + "interval": "5m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Successful Builds by State", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DESTROY" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "STOP" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "START" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 19.2 + }, + "id": 1, + "interval": "5m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Unsuccessful Builds by State", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 19.2 + }, + "id": 34, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Workspaces \"transition\" between `STOP`, `START`, and `DESTROY` states.\n\nWorkspaces transition between states when a \"build\" is initiated, which is an execution of `terraform` against the chosen template.\n\nUse the \"Build Count\" table to identify workspace owners which may be struggling with template builds, in order to proactively reach out to them with assistance.\n\nConsult the [Template documentation](https://coder.com/docs/v2/latest/templates) for more information.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "failed": { + "color": "orange", + "index": 1, + "text": "Failure" + }, + "success": { + "color": "green", + "index": 0, + "text": "Success" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workspace Transition" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "DESTROY": { + "color": "red", + "index": 0 + }, + "START": { + "color": "blue", + "index": 1 + }, + "STOP": { + "color": "purple", + "index": 2 + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 20, + "x": 0, + "y": 27.2 + }, + "id": 6, + "interval": "", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Time" + } + ] + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0", + "format": "table", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Build Log", + "transformations": [ + { + "disabled": true, + "id": "groupBy", + "options": { + "fields": { + "Count": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Status": { + "aggregations": [], + "operation": "groupby" + }, + "Template Name": { + "aggregations": [], + "operation": "groupby" + }, + "Template Version": { + "aggregations": [], + "operation": "groupby" + }, + "Total": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Value": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Workspace Name": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Ownert": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Transition": { + "aggregations": [], + "operation": "groupby" + }, + "status": { + "aggregations": [], + "operation": "groupby" + }, + "template_name": { + "aggregations": [], + "operation": "groupby" + }, + "template_version": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_name": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_owner": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_transition": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Value" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Count", + "Value (sum)": "Total", + "status": "Status", + "template_name": "Template Name", + "template_version": "Template Version", + "workspace_name": "Workspace Name", + "workspace_owner": "Workspace Owner", + "workspace_transition": "Workspace Transition" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 27.2 + }, + "id": 29, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 37.2 + }, + "id": 8, + "interval": "1h", + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (workspace_owner) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Workspace by User", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 5, + "y": 37.2 + }, + "id": 9, + "interval": "1h", + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (workspace_owner, template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": {{ printf "{{workspace_owner}}:{{template_name}}" | quote }}, + "range": false, + "refId": "A" + } + ], + "title": "Workspace by User/Template", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 10, + "y": 37.2 + }, + "id": 4, + "interval": "1h", + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Template Usage", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 15, + "y": 37.2 + }, + "id": 5, + "interval": "1h", + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (template_name, template_version) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": {{ printf "{{template_name}}:{{template_version}}" | quote }}, + "range": false, + "refId": "A" + } + ], + "title": "Template Version Usage", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 37.2 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "These charts show the distribution of workspaces and templates.\n\nUse these charts to identify which users have outdated templates, and which templates are the most/least popular in your organisation.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44.2 + }, + "id": 32, + "panels": [], + "title": "Logs", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 20, + "x": 0, + "y": 45.2 + }, + "id": 7, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{ {{- include "non-workspace-selector" . -}}, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" or \"$template_name\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 45.2 + }, + "id": 22, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "These are the logs produced by the [Provisioners](/d/provisionerd/provisioners?${__url_time_range}).\n\nUse the dropdowns at the top to filter the logs down to a specific workspace and/or template.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "{{- include "dashboard-refresh" . -}}", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_workspace_builds_total,workspace_name)", + "hide": 0, + "includeAll": true, + "label": "Workspace Name Filter", + "multi": true, + "name": "workspace_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_workspace_builds_total,workspace_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_workspace_builds_total,template_name)", + "hide": 0, + "includeAll": true, + "label": "Template Name Filter", + "multi": true, + "name": "template_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_workspace_builds_total,template_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-{{- include "dashboard-range" . -}}", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Workspaces", + "uid": "workspaces", + "version": 2, + "weekStart": "" +} +{{ end }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml b/coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml new file mode 100644 index 0000000..33719f5 --- /dev/null +++ b/coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-coderd + namespace: {{ .Release.Namespace }} +data: + coderd.json: |- {{- include "coderd-dashboard.json" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml b/coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml new file mode 100644 index 0000000..14d5908 --- /dev/null +++ b/coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-prebuilds + namespace: {{ .Release.Namespace }} +data: + prebuilds.json: |- {{- include "prebuilds-dashboard.json" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml b/coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml new file mode 100644 index 0000000..0c20e83 --- /dev/null +++ b/coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-provisionerd + namespace: {{ .Release.Namespace }} +data: + provisionerd.json: |- {{- include "provisionerd-dashboard.json" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/configmap-dashboards-status.yaml b/coder-observability/templates/dashboards/configmap-dashboards-status.yaml new file mode 100644 index 0000000..e307cc5 --- /dev/null +++ b/coder-observability/templates/dashboards/configmap-dashboards-status.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-status + namespace: {{ .Release.Namespace }} +data: + status.json: |- {{- include "status-dashboard.json" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml b/coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml new file mode 100644 index 0000000..084c5e1 --- /dev/null +++ b/coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-workspace-detail + namespace: {{ .Release.Namespace }} +data: + workspaces-detail.json: |- {{- include "workspace-detail-dashboard.json" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml b/coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml new file mode 100644 index 0000000..bae657d --- /dev/null +++ b/coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-workspaces + namespace: {{ .Release.Namespace }} +data: + workspaces.json: |- {{- include "workspaces-dashboard.json" . | trim | nindent 4 }} \ No newline at end of file diff --git a/coder-observability/templates/service-runbook-viewer.yaml b/coder-observability/templates/service-runbook-viewer.yaml new file mode 100644 index 0000000..68c210a --- /dev/null +++ b/coder-observability/templates/service-runbook-viewer.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: runbook-viewer +spec: + ports: + - port: 80 + targetPort: 3000 + protocol: TCP + selector: + app: runbook-viewer diff --git a/coder-observability/templates/statefulset-postgres-exporter.yaml b/coder-observability/templates/statefulset-postgres-exporter.yaml index 0126e50..a1f6e55 100644 --- a/coder-observability/templates/statefulset-postgres-exporter.yaml +++ b/coder-observability/templates/statefulset-postgres-exporter.yaml @@ -3,6 +3,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: postgres-exporter + namespace: {{ .Release.Namespace }} spec: selector: matchLabels: @@ -15,10 +16,11 @@ spec: prometheus.io/scrape: 'true' labels: app: postgres-exporter + app.kubernetes.io/name: "database-stats" spec: containers: - - name: exporter - image: quay.io/prometheuscommunity/postgres-exporter + - name: postgres-exporter + image: {{ .Values.global.postgres.exporter.image }} args: - --collector.long_running_transactions ports: @@ -27,3 +29,12 @@ spec: env: - name: DATA_SOURCE_NAME value: '{{ include "postgres-connector-string" . }}' + {{ include "postgres-secret-mount" . | nindent 10 }} + {{- if .Values.global.postgres.volumeMounts }} + volumeMounts: + {{ toYaml .Values.global.postgres.volumeMounts | nindent 12 }} + {{- end }} + {{- if .Values.global.postgres.volumes }} + volumes: + {{ toYaml .Values.global.postgres.volumes | nindent 8 }} + {{- end }} \ No newline at end of file diff --git a/coder-observability/templates/statefulset-runbook-viewer.yaml b/coder-observability/templates/statefulset-runbook-viewer.yaml new file mode 100644 index 0000000..64f50e4 --- /dev/null +++ b/coder-observability/templates/statefulset-runbook-viewer.yaml @@ -0,0 +1,34 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: runbook-viewer + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: + app: runbook-viewer + serviceName: runbook-viewer + replicas: 1 + template: + metadata: + annotations: + checksum/config: {{ (.Files.Glob "runbooks/**").AsConfig | indent 2 | sha256sum }} + labels: + app: runbook-viewer + spec: + containers: + - name: madness + image: {{ .Values.runbookViewer.image }} + ports: + - containerPort: 3000 + name: madness + args: + - server + volumeMounts: + - mountPath: /docs/ + name: runbooks + volumes: + - name: runbooks + configMap: + name: runbooks diff --git a/coder-observability/templates/statefulset-sql-exporter.yaml b/coder-observability/templates/statefulset-sql-exporter.yaml index fdbbc47..628339e 100644 --- a/coder-observability/templates/statefulset-sql-exporter.yaml +++ b/coder-observability/templates/statefulset-sql-exporter.yaml @@ -3,6 +3,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: sql-exporter + namespace: {{ .Release.Namespace }} spec: selector: matchLabels: @@ -16,10 +17,11 @@ spec: checksum/config: {{ include (print $.Template.BasePath "/configmap-sql-exporter.yaml") . | sha256sum }} labels: app: sql-exporter + app.kubernetes.io/name: "database-stats" spec: containers: - - name: exporter - image: burningalchemist/sql_exporter + - name: sql-exporter + image: {{ .Values.sqlExporter.image }} args: - -config.file=/cfg/config.yaml ports: @@ -28,6 +30,7 @@ spec: volumeMounts: - mountPath: /cfg/ name: config + {{ include "postgres-secret-mount" . | nindent 10 }} volumes: - name: config configMap: diff --git a/coder-observability/values.yaml b/coder-observability/values.yaml index 3f44586..6d06981 100644 --- a/coder-observability/values.yaml +++ b/coder-observability/values.yaml @@ -1,18 +1,141 @@ -fullnameOverride: null - global: + coder: + # global.coder.scrapeMetrics -- use this to scrape metrics from a standalone (set of) coder deployment(s) + # if using kubernetes, rather add an annotation "prometheus.io/scrape=true" and coder will get automatically scraped; + # set this value to null and configure coderdSelector to target your coder pods + scrapeMetrics: null +# hostname: localhost +# port: 2112 +# scrapeInterval: 15s +# additionalLabels: +# job: coder + # global.coder.coderdSelector -- series selector for Prometheus/Loki to locate provisioner pods. + # ensure this uses backticks for quotes! + coderdSelector: 'pod=~`coder.*`, pod!~`.*provisioner.*`' + # global.coder.provisionerdSelector -- series selector for Prometheus/Loki to locate provisioner pods. + # https://coder.com/docs/v2/latest/admin/provisioners + # TODO: rename container label in provisioner helm chart to be "provisioner" not "coder" + # ensure this uses backticks for quotes! + provisionerdSelector: 'pod=~`coder-provisioner.*`' + # global.coder.workspacesSelector -- the namespace into which any external provisioners have been deployed. + workspacesSelector: 'namespace=`coder-workspaces`' + # global.coder.controlPlaneNamespace -- the namespace into which the control plane has been deployed. + controlPlaneNamespace: coder + # global.coder.externalProvisionersNamespace -- the namespace into which any external provisioners have been deployed. + externalProvisionersNamespace: coder + # See https://coder.com/docs/v2/latest/cli/server#--log-human + # "Human" format is the default, which is a combination of plaintext and logfmt but it' quite tricky to parse reliably + # with regex matchers. + # TODO: support "json" format + logFormat: human + # global.coder.alerts -- alerts for the various aspects of Coder + alerts: + enterprise: + groups: + Licences: + enabled: true + delay: 1m + thresholds: + warning: 0.9 + critical: 1 + coderd: + groups: + CPU: + enabled: true + delay: 10m + period: 10m + thresholds: + warning: 0.8 + critical: 0.9 + Memory: + enabled: true + delay: 10m + thresholds: + warning: 0.8 + critical: 0.9 + Restarts: + enabled: true + delay: 1m + period: 10m + thresholds: + notify: 1 + warning: 2 + critical: 3 + Replicas: + enabled: true + delay: 5m + thresholds: + notify: 3 # 2/3 replicas are alive + warning: 2 # 1/3 replicas are alive + critical: 1 # 0/3 replicas are alive + WorkspaceBuildFailures: + enabled: true + delay: 10m + period: 10m + thresholds: + notify: 2 + warning: 5 + critical: 10 + IneligiblePrebuilds: + enabled: true + delay: 10m + thresholds: + notify: 1 + UnprovisionedPrebuiltWorkspaces: + enabled: true + delay: 10m + thresholds: + warn: 1 + provisionerd: + groups: + Replicas: + enabled: true + delay: 5m + thresholds: + notify: 3 # 2/3 replicas are alive + warning: 2 # 1/3 replicas are alive + critical: 1 # 0/3 replicas are alive + zone: svc - # these settings are global so we can parameterise some values which get rendered by subcharts + externalScheme: http + # The external hostname from which k8s services can be accessed in the form of: + # :.<> + # e.g. + # http://dashboards.coder-observability.svc.cluster.local + externalZone: svc.cluster.local + + # global.telemetry -- control telemetry collection + telemetry: + # global.telemetry.metrics -- control metric collection + metrics: + # global.telemetry.metrics.scrape_interval -- how often the collector will scrape discovered pods + scrape_interval: 15s + # global.telemetry.metrics.scrape_timeout -- how long a request will be allowed to wait before being canceled + scrape_timeout: 12s + + # global.postgres -- postgres connection information + # NOTE: these settings are global so we can parameterise some values which get rendered by subcharts postgres: - # Credentials suggested in https://coder.com/docs/v2/latest/install/database by default hostname: localhost port: 5432 username: coder - password: secret42 # TODO make secret + password: database: coder sslmode: disable + # SSL root certificate path - only required when sslmode != "disable" + sslrootcert: + # ensure that your secret has a field named `PGPASSWORD` + mountSecret: "secret-postgres" + exporter: + image: "quay.io/prometheuscommunity/postgres-exporter" + + # volumes and volumeMounts for SSL certificates + volumes: [] + volumeMounts: [] + + # global.postgres.alerts -- alerts for postgres alerts: groups: Basic: @@ -25,10 +148,32 @@ global: notify: 0.5 warning: 0.8 critical: 0.9 + Connections: + enabled: true + delay: 5m + thresholds: + notify: 0.5 + warning: 0.8 + critical: 0.9 + + # global.dashboards -- settings for bundled dashboards + dashboards: + # global.dashboards.timerange -- how far back dashboards should look + timerange: 12h + # global.dashboards.refresh -- how often dashboards should refresh + refresh: 30s + # global.dashboards.queryTimeout -- how long until a query in Grafana will timeout after + queryTimeout: 900 -collector: +runbookViewer: + image: "dannyben/madness" + +sqlExporter: + image: "burningalchemist/sql_exporter" + +grafana-agent: enabled: true - fullnameOverride: collector + fullnameOverride: grafana-agent agent: mode: flow configMap: @@ -49,9 +194,97 @@ collector: crds: create: false -dashboards: + withOTLPReceiver: false + + # Configuration blocks + # + # Enable debug logging (warning: produces large amount of logs!) + #logging: |- + # logging { + # level = "debug" + # format = "logfmt" + # } + discovery: |- + // Discover k8s nodes + discovery.kubernetes "nodes" { + role = "node" + } + + // Discover k8s pods + discovery.kubernetes "pods" { + role = "pod" + selectors { + role = "pod" + } + } + commonRelabellings: |- + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + // coalesce the following labels and pick the first value; we'll use this to define the "job" label + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "app", "__meta_kubernetes_pod_container_name"] + separator = "/" + target_label = "__meta_app" + action = "replace" + regex = "^/*([^/]+?)(?:/.*)?$" // split by the delimiter if it exists, we only want the first one + replacement = "${1}" + } + rule { + source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_app"] + separator = "/" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + rule { + regex = "__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)" + action = "labeldrop" + } + rule { + regex = "pod_template_generation" + action = "labeldrop" + } + rule { + source_labels = ["__meta_kubernetes_pod_phase"] + regex = "Pending|Succeeded|Failed|Completed" + action = "drop" + } + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + action = "replace" + target_label = "node" + } + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)" + replacement = "__param_$1" + } + extraBlocks: "" + # Examples: + # loki.source.file "tmpfiles" { + # targets = [ + # {__path__ = "/tmp/foo.txt", "color" = "pink"}, + # {__path__ = "/tmp/bar.txt", "color" = "blue"}, + # {__path__ = "/tmp/baz.txt", "color" = "grey"}, + # ] + # forward_to = [loki.write.loki.receiver] + # } + podMetricsRelabelRules: "" + podLogsRelabelRules: "" + +grafana: enabled: true - fullnameOverride: dashboards + image: + tag: 10.4.19 + fullnameOverride: grafana useStatefulSet: true replicas: 1 deploymentStrategy: @@ -67,7 +300,7 @@ dashboards: # TODO: this adds annotations to _all_ resources; can we be more specific? prometheus.io/scrape: "true" dashboardProviders: - node_exporter.yaml: + infra.yaml: apiVersion: 1 providers: - name: infra @@ -78,10 +311,27 @@ dashboards: editable: false options: path: /var/lib/grafana/dashboards/infra + coder.yaml: + apiVersion: 1 + providers: + - name: coder + orgId: 1 + folder: 'Coder' + type: file + updateIntervalSeconds: 5 + disableDeletion: false + editable: false + options: + path: /var/lib/grafana/dashboards/coder + sidecar.yaml: + apiVersion: 1 + providers: - name: sidecar orgId: 1 type: file + folder: 'Other' disableDeletion: false + updateIntervalSeconds: 30 editable: false options: path: /tmp/dashboards @@ -102,31 +352,44 @@ dashboards: datasources: - name: metrics type: prometheus - url: http://metrics.{{ .Release.Namespace }}.{{ $.Values.global.zone }} + url: http://prometheus.{{ .Release.Namespace }}.{{ $.Values.global.zone }} access: proxy isDefault: true editable: false + # add 5s on global timeout to distinguish between Grafana timeout & datasource timeout + timeout: '{{ add $.Values.global.dashboards.queryTimeout 5 }}' + uid: prometheus - name: logs type: loki - url: http://logs-gateway.{{ .Release.Namespace }}.{{ $.Values.global.zone }} + url: http://loki-gateway.{{ .Release.Namespace }}.{{ $.Values.global.zone }} access: proxy isDefault: false editable: false + # add 5s on global timeout to distinguish between Grafana timeout & datasource timeout + timeout: '{{ add $.Values.global.dashboards.queryTimeout 5 }}' + uid: loki - name: postgres type: postgres url: '{{ .Values.global.postgres.hostname }}:{{ .Values.global.postgres.port }}' user: '{{ .Values.global.postgres.username }}' secureJsonData: - password: '{{ .Values.global.postgres.password }}' + password: '{{ if .Values.global.postgres.password }}{{ .Values.global.postgres.password }}{{ else }}$PGPASSWORD{{ end }}' jsonData: sslmode: '{{ .Values.global.postgres.sslmode }}' isDefault: false editable: false + # add 5s on global timeout to distinguish between Grafana timeout & datasource timeout + timeout: '{{ add $.Values.global.dashboards.queryTimeout 5 }}' + uid: postgres admin: - existingSecret: grafana-admin - userKey: username - passwordKey: password + existingSecret: "" + env: + GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION: true grafana.ini: + auth.anonymous: + enabled: true + org_name: Main Org. + org_role: Admin analytics: reporting_enabled: false users: @@ -135,25 +398,60 @@ dashboards: # migrate Angular panels to React # see https://grafana.com/docs/grafana/latest/developers/angular_deprecation/angular-plugins/#automatic-migration-of-plugins autoMigrateOldPanels: true + dashboards: + # mounted configmap will be synced with sidecar + default_home_dashboard_path: /var/lib/grafana/dashboards/coder/0/status.json + dataproxy: + timeout: '{{ $.Values.global.dashboards.queryTimeout }}' sidecar: dashboards: provider: disableDelete: true allowUiUpdates: true - enabled: true + enabled: false labelValue: "1" + extraConfigmapMounts: + # we can't combine configmaps because of the 1MiB size limit, but Grafana will scan + # the /var/lib/grafana/dashboards/coder directory deeply to find dashboards + - name: dashboards-status + mountPath: /var/lib/grafana/dashboards/coder/0 + configMap: dashboards-status + readOnly: false + - name: dashboards-coderd + mountPath: /var/lib/grafana/dashboards/coder/1 + configMap: dashboards-coderd + readOnly: false + - name: dashboards-provisionerd + mountPath: /var/lib/grafana/dashboards/coder/2 + configMap: dashboards-provisionerd + readOnly: false + - name: dashboards-workspaces + mountPath: /var/lib/grafana/dashboards/coder/3 + configMap: dashboards-workspaces + readOnly: false + - name: dashboards-workspace-detail + mountPath: /var/lib/grafana/dashboards/coder/4 + configMap: dashboards-workspace-detail + readOnly: false + - name: dashboards-prebuilds + mountPath: /var/lib/grafana/dashboards/coder/5 + configMap: dashboards-prebuilds + readOnly: false -metrics: +prometheus: enabled: true server: - fullnameOverride: metrics + fullnameOverride: prometheus podAnnotations: prometheus.io/scrape: "true" global: - scrape_interval: 15s + # prometheus.server.evaluation_interval -- how often to evaluate recording & alerting rule groups + evaluation_interval: 30s + extraArgs: log.level: debug + replicaCount: 1 statefulSet: enabled: true @@ -177,14 +475,27 @@ metrics: serverFiles: prometheus.yml: # disables scraping of metrics by the Prometheus helm chart since this is managed by the collector - scrape_configs: + scrape_configs: [] # use custom rule files to be able to render templates (can't do that in values.yaml, unless that value is evaluated by a tpl call) rule_files: - - /etc/config/alerts/postgres.yaml + - /etc/config/alerts/*.yaml testFramework: enabled: false + # enable metric collection from configmap reloader + configmapReload: + prometheus: + extraArgs: + log-level: all + watch-interval: 15s + containerPort: 9091 + extraConfigmapMounts: + - name: alerts + mountPath: /etc/config/alerts + configMap: metrics-alerts + readonly: true + alertmanager: fullnameOverride: alertmanager enabled: true @@ -206,10 +517,11 @@ metrics: # Disable push gateway prometheus-pushgateway: enabled: false -logs: + +loki: enabled: true - nameOverride: logs - fullnameOverride: logs + nameOverride: loki + fullnameOverride: loki enterprise: enabled: false @@ -218,20 +530,20 @@ logs: useExternalLicense: false test: - canaryServiceAddress: "http://logs-canary:3500/metrics" + canaryServiceAddress: "http://loki-canary:3500/metrics" enabled: true minio: enabled: true - fullnameOverride: logs-storage - address: logs-storage.{{ .Release.Namespace }}.{{ .Values.global.zone}}:9000 + fullnameOverride: loki-storage + address: loki-storage.{{ .Release.Namespace }}.{{ .Values.global.zone}}:9000 podAnnotations: prometheus.io/scrape: "true" prometheus.io/path: "/minio/v2/metrics/cluster" + podLabels: + app.kubernetes.io/name: "loki-storage" loki: - podAnnotations: - prometheus.io/scrape: "true" auth_enabled: false commonConfig: path_prefix: /var/loki @@ -252,7 +564,7 @@ logs: clients: # "fake" is the default username when auth is disabled (unfortunate, I know) fake: - url: http://metrics.{{ .Release.Namespace }}.{{ .Values.global.zone}}/api/v1/write + url: http://prometheus.{{ .Release.Namespace }}.{{ .Values.global.zone}}/api/v1/write headers: Source: Loki remote_timeout: 30s @@ -298,12 +610,18 @@ logs: gateway: replicas: 1 write: + podAnnotations: + prometheus.io/scrape: "true" replicas: 1 extraArgs: - -log.level=debug read: + podAnnotations: + prometheus.io/scrape: "true" replicas: 1 backend: + podAnnotations: + prometheus.io/scrape: "true" replicas: 1 extraVolumes: - name: ruler-wal @@ -312,4 +630,4 @@ logs: - name: ruler-wal mountPath: /var/loki-ruler-wal extraArgs: - - -log.level=debug \ No newline at end of file + - -log.level=debug diff --git a/compiled/resources.yaml b/compiled/resources.yaml new file mode 100644 index 0000000..aff5679 --- /dev/null +++ b/compiled/resources.yaml @@ -0,0 +1,12418 @@ +--- +# Source: coder-observability/charts/loki/templates/chunks-cache/poddisruptionbudget-chunks-cache.yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: loki-memcached-chunks-cache + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: memcached-chunks-cache +spec: + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: memcached-chunks-cache + maxUnavailable: 1 +--- +# Source: coder-observability/charts/loki/templates/results-cache/poddisruptionbudget-results-cache.yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: loki-memcached-results-cache + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: memcached-results-cache +spec: + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: memcached-results-cache + maxUnavailable: 1 +--- +# Source: coder-observability/charts/grafana-agent/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana-agent + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +--- +# Source: coder-observability/charts/grafana/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: false +metadata: + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + name: grafana + namespace: coder-observability +--- +# Source: coder-observability/charts/loki/charts/minio/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "minio-sa" + namespace: "coder-observability" +--- +# Source: coder-observability/charts/loki/templates/loki-canary/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: loki-canary + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: canary + annotations: + prometheus.io/scrape: "true" +automountServiceAccountToken: true +--- +# Source: coder-observability/charts/loki/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: loki + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +automountServiceAccountToken: true +--- +# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + namespace: coder-observability +automountServiceAccountToken: true +--- +# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: true +metadata: + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability + name: kube-state-metrics + namespace: coder-observability +--- +# Source: coder-observability/charts/prometheus/charts/prometheus-node-exporter/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: coder-observability + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: coder-observability +automountServiceAccountToken: false +--- +# Source: coder-observability/charts/prometheus/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus + namespace: coder-observability + annotations: {} +--- +# Source: coder-observability/charts/loki/charts/minio/templates/secrets.yaml +apiVersion: v1 +kind: Secret +metadata: + name: loki-storage + namespace: "coder-observability" + labels: + app: minio + chart: minio-4.0.15 + release: coder-observability + heritage: Helm +type: Opaque +data: + rootUser: "ZW50ZXJwcmlzZS1sb2dz" + rootPassword: "c3VwZXJzZWNyZXQ=" +--- +# Source: coder-observability/charts/grafana/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + annotations: + prometheus.io/scrape: "true" +data: + grafana.ini: | + [analytics] + check_for_updates = true + reporting_enabled = false + [auth.anonymous] + enabled = true + org_name = Main Org. + org_role = Admin + [dashboards] + default_home_dashboard_path = /var/lib/grafana/dashboards/coder/0/status.json + [dataproxy] + timeout = 900 + [feature_toggles] + autoMigrateOldPanels = true + [grafana_net] + url = https://grafana.net + [log] + mode = console + [paths] + data = /var/lib/grafana/ + logs = /var/log/grafana + plugins = /var/lib/grafana/plugins + provisioning = /etc/grafana/provisioning + [server] + domain = '' + [users] + allow_sign_up = false + datasources.yaml: | + apiVersion: 1 + datasources: + - access: proxy + editable: false + isDefault: true + name: metrics + timeout: '905' + type: prometheus + uid: prometheus + url: http://prometheus.coder-observability.svc + - access: proxy + editable: false + isDefault: false + name: logs + timeout: '905' + type: loki + uid: loki + url: http://loki-gateway.coder-observability.svc + - editable: false + isDefault: false + jsonData: + sslmode: 'disable' + name: postgres + secureJsonData: + password: '$PGPASSWORD' + timeout: '905' + type: postgres + uid: postgres + url: 'localhost:5432' + user: 'coder' + coder.yaml: | + apiVersion: 1 + providers: + - disableDeletion: false + editable: false + folder: Coder + name: coder + options: + path: /var/lib/grafana/dashboards/coder + orgId: 1 + type: file + updateIntervalSeconds: 5 + infra.yaml: | + apiVersion: 1 + providers: + - disableDeletion: false + editable: false + folder: Infrastructure + name: infra + options: + path: /var/lib/grafana/dashboards/infra + orgId: 1 + type: file + sidecar.yaml: | + apiVersion: 1 + providers: + - disableDeletion: false + editable: false + folder: Other + name: sidecar + options: + path: /tmp/dashboards + orgId: 1 + type: file + updateIntervalSeconds: 30 + download_dashboards.sh: "#!/usr/bin/env sh\nset -euf\nmkdir -p /var/lib/grafana/dashboards/coder\nmkdir -p /var/lib/grafana/dashboards/infra\nmkdir -p /tmp/dashboards\n\ncurl -skf \\\n--connect-timeout 60 \\\n--max-time 60 \\\n-H \"Accept: application/json\" \\\n-H \"Content-Type: application/json;charset=UTF-8\" \\\n \"https://grafana.com/api/dashboards/1860/revisions/36/download\" \\\n | sed '/-- .* --/! s/\"datasource\":.*,/\"datasource\": \"metrics\",/g' \\\n> \"/var/lib/grafana/dashboards/infra/node-exporter-full.json\"\n \ncurl -skf \\\n--connect-timeout 60 \\\n--max-time 60 \\\n-H \"Accept: application/json\" \\\n-H \"Content-Type: application/json;charset=UTF-8\" \\\n \"https://grafana.com/api/dashboards/9628/revisions/7/download\" \\\n | sed '/-- .* --/! s/\"datasource\":.*,/\"datasource\": \"metrics\",/g' \\\n> \"/var/lib/grafana/dashboards/infra/postgres-database.json\"\n" +--- +# Source: coder-observability/charts/grafana/templates/dashboards-json-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards-infra + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + dashboard-provider: infra +data: {} +--- +# Source: coder-observability/charts/loki/charts/minio/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-storage + namespace: "coder-observability" + labels: + app: minio + chart: minio-4.0.15 + release: coder-observability + heritage: Helm +data: + initialize: "#!/bin/sh\nset -e ; # Have script exit in the event of a failed command.\nMC_CONFIG_DIR=\"/etc/minio/mc/\"\nMC=\"/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}\"\n\n# connectToMinio\n# Use a check-sleep-check loop to wait for MinIO service to be available\nconnectToMinio() {\n SCHEME=$1\n ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts\n set -e ; # fail if we can't read the keys.\n ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ;\n set +e ; # The connections to minio are allowed to fail.\n echo \"Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT\" ;\n MC_COMMAND=\"${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET\" ;\n $MC_COMMAND ;\n STATUS=$? ;\n until [ $STATUS = 0 ]\n do\n ATTEMPTS=`expr $ATTEMPTS + 1` ;\n echo \\\"Failed attempts: $ATTEMPTS\\\" ;\n if [ $ATTEMPTS -gt $LIMIT ]; then\n exit 1 ;\n fi ;\n sleep 2 ; # 1 second intervals between attempts\n $MC_COMMAND ;\n STATUS=$? ;\n done ;\n set -e ; # reset `e` as active\n return 0\n}\n\n# checkBucketExists ($bucket)\n# Check if the bucket exists, by using the exit code of `mc ls`\ncheckBucketExists() {\n BUCKET=$1\n CMD=$(${MC} ls myminio/$BUCKET > /dev/null 2>&1)\n return $?\n}\n\n# createBucket ($bucket, $policy, $purge)\n# Ensure bucket exists, purging if asked to\ncreateBucket() {\n BUCKET=$1\n POLICY=$2\n PURGE=$3\n VERSIONING=$4\n OBJECTLOCKING=$5\n\n # Purge the bucket, if set & exists\n # Since PURGE is user input, check explicitly for `true`\n if [ $PURGE = true ]; then\n if checkBucketExists $BUCKET ; then\n echo \"Purging bucket '$BUCKET'.\"\n set +e ; # don't exit if this fails\n ${MC} rm -r --force myminio/$BUCKET\n set -e ; # reset `e` as active\n else\n echo \"Bucket '$BUCKET' does not exist, skipping purge.\"\n fi\n fi\n\n# Create the bucket if it does not exist and set objectlocking if enabled (NOTE: versioning will be not changed if OBJECTLOCKING is set because it enables versioning to the Buckets created)\nif ! checkBucketExists $BUCKET ; then\n if [ ! -z $OBJECTLOCKING ] ; then\n if [ $OBJECTLOCKING = true ] ; then\n echo \"Creating bucket with OBJECTLOCKING '$BUCKET'\"\n ${MC} mb --with-lock myminio/$BUCKET\n elif [ $OBJECTLOCKING = false ] ; then\n echo \"Creating bucket '$BUCKET'\"\n ${MC} mb myminio/$BUCKET\n fi\n elif [ -z $OBJECTLOCKING ] ; then\n echo \"Creating bucket '$BUCKET'\"\n ${MC} mb myminio/$BUCKET\n else\n echo \"Bucket '$BUCKET' already exists.\" \n fi\n fi\n\n\n # set versioning for bucket if objectlocking is disabled or not set\n if [ -z $OBJECTLOCKING ] ; then\n if [ ! -z $VERSIONING ] ; then\n if [ $VERSIONING = true ] ; then\n echo \"Enabling versioning for '$BUCKET'\"\n ${MC} version enable myminio/$BUCKET\n elif [ $VERSIONING = false ] ; then\n echo \"Suspending versioning for '$BUCKET'\"\n ${MC} version suspend myminio/$BUCKET\n fi\n fi\n else\n echo \"Bucket '$BUCKET' versioning unchanged.\"\n fi\n\n\n # At this point, the bucket should exist, skip checking for existence\n # Set policy on the bucket\n echo \"Setting policy of bucket '$BUCKET' to '$POLICY'.\"\n ${MC} policy set $POLICY myminio/$BUCKET\n}\n\n# Try connecting to MinIO instance\nscheme=http\nconnectToMinio $scheme\n\n\n\n# Create the buckets\ncreateBucket chunks none false \ncreateBucket ruler none false \ncreateBucket admin none false " + add-user: |- + #!/bin/sh + set -e ; # Have script exit in the event of a failed command. + MC_CONFIG_DIR="/etc/minio/mc/" + MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}" + + # AccessKey and secretkey credentials file are added to prevent shell execution errors caused by special characters. + # Special characters for example : ',",<,>,{,} + MINIO_ACCESSKEY_SECRETKEY_TMP="/tmp/accessKey_and_secretKey_tmp" + + # connectToMinio + # Use a check-sleep-check loop to wait for MinIO service to be available + connectToMinio() { + SCHEME=$1 + ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts + set -e ; # fail if we can't read the keys. + ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ; + set +e ; # The connections to minio are allowed to fail. + echo "Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ; + MC_COMMAND="${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ; + $MC_COMMAND ; + STATUS=$? ; + until [ $STATUS = 0 ] + do + ATTEMPTS=`expr $ATTEMPTS + 1` ; + echo \"Failed attempts: $ATTEMPTS\" ; + if [ $ATTEMPTS -gt $LIMIT ]; then + exit 1 ; + fi ; + sleep 2 ; # 1 second intervals between attempts + $MC_COMMAND ; + STATUS=$? ; + done ; + set -e ; # reset `e` as active + return 0 + } + + # checkUserExists () + # Check if the user exists, by using the exit code of `mc admin user info` + checkUserExists() { + CMD=$(${MC} admin user info myminio $(head -1 $MINIO_ACCESSKEY_SECRETKEY_TMP) > /dev/null 2>&1) + return $? + } + + # createUser ($policy) + createUser() { + POLICY=$1 + #check accessKey_and_secretKey_tmp file + if [[ ! -f $MINIO_ACCESSKEY_SECRETKEY_TMP ]];then + echo "credentials file does not exist" + return 1 + fi + if [[ $(cat $MINIO_ACCESSKEY_SECRETKEY_TMP|wc -l) -ne 2 ]];then + echo "credentials file is invalid" + rm -f $MINIO_ACCESSKEY_SECRETKEY_TMP + return 1 + fi + USER=$(head -1 $MINIO_ACCESSKEY_SECRETKEY_TMP) + # Create the user if it does not exist + if ! checkUserExists ; then + echo "Creating user '$USER'" + cat $MINIO_ACCESSKEY_SECRETKEY_TMP | ${MC} admin user add myminio + else + echo "User '$USER' already exists." + fi + #clean up credentials files. + rm -f $MINIO_ACCESSKEY_SECRETKEY_TMP + + # set policy for user + if [ ! -z $POLICY -a $POLICY != " " ] ; then + echo "Adding policy '$POLICY' for '$USER'" + ${MC} admin policy set myminio $POLICY user=$USER + else + echo "User '$USER' has no policy attached." + fi + } + + # Try connecting to MinIO instance + scheme=http + connectToMinio $scheme + + + + # Create the users + echo console > $MINIO_ACCESSKEY_SECRETKEY_TMP + echo console123 >> $MINIO_ACCESSKEY_SECRETKEY_TMP + createUser consoleAdmin + add-policy: |- + #!/bin/sh + set -e ; # Have script exit in the event of a failed command. + MC_CONFIG_DIR="/etc/minio/mc/" + MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}" + + # connectToMinio + # Use a check-sleep-check loop to wait for MinIO service to be available + connectToMinio() { + SCHEME=$1 + ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts + set -e ; # fail if we can't read the keys. + ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ; + set +e ; # The connections to minio are allowed to fail. + echo "Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ; + MC_COMMAND="${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ; + $MC_COMMAND ; + STATUS=$? ; + until [ $STATUS = 0 ] + do + ATTEMPTS=`expr $ATTEMPTS + 1` ; + echo \"Failed attempts: $ATTEMPTS\" ; + if [ $ATTEMPTS -gt $LIMIT ]; then + exit 1 ; + fi ; + sleep 2 ; # 1 second intervals between attempts + $MC_COMMAND ; + STATUS=$? ; + done ; + set -e ; # reset `e` as active + return 0 + } + + # checkPolicyExists ($policy) + # Check if the policy exists, by using the exit code of `mc admin policy info` + checkPolicyExists() { + POLICY=$1 + CMD=$(${MC} admin policy info myminio $POLICY > /dev/null 2>&1) + return $? + } + + # createPolicy($name, $filename) + createPolicy () { + NAME=$1 + FILENAME=$2 + + # Create the name if it does not exist + echo "Checking policy: $NAME (in /config/$FILENAME.json)" + if ! checkPolicyExists $NAME ; then + echo "Creating policy '$NAME'" + else + echo "Policy '$NAME' already exists." + fi + ${MC} admin policy add myminio $NAME /config/$FILENAME.json + + } + + # Try connecting to MinIO instance + scheme=http + connectToMinio $scheme + custom-command: |- + #!/bin/sh + set -e ; # Have script exit in the event of a failed command. + MC_CONFIG_DIR="/etc/minio/mc/" + MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}" + + # connectToMinio + # Use a check-sleep-check loop to wait for MinIO service to be available + connectToMinio() { + SCHEME=$1 + ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attempts + set -e ; # fail if we can't read the keys. + ACCESS=$(cat /config/rootUser) ; SECRET=$(cat /config/rootPassword) ; + set +e ; # The connections to minio are allowed to fail. + echo "Connecting to MinIO server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ; + MC_COMMAND="${MC} alias set myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ; + $MC_COMMAND ; + STATUS=$? ; + until [ $STATUS = 0 ] + do + ATTEMPTS=`expr $ATTEMPTS + 1` ; + echo \"Failed attempts: $ATTEMPTS\" ; + if [ $ATTEMPTS -gt $LIMIT ]; then + exit 1 ; + fi ; + sleep 2 ; # 1 second intervals between attempts + $MC_COMMAND ; + STATUS=$? ; + done ; + set -e ; # reset `e` as active + return 0 + } + + # runCommand ($@) + # Run custom mc command + runCommand() { + ${MC} "$@" + return $? + } + + # Try connecting to MinIO instance + scheme=http + connectToMinio $scheme +--- +# Source: coder-observability/charts/loki/templates/config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +data: + config.yaml: |2 + auth_enabled: false + chunk_store_config: + chunk_cache_config: + background: + writeback_buffer: 500000 + writeback_goroutines: 1 + writeback_size_limit: 500MB + default_validity: 0s + memcached: + batch_size: 4 + parallelism: 5 + memcached_client: + addresses: dnssrvnoa+_memcached-client._tcp.loki-chunks-cache.coder-observability.svc + consistent_hash: true + max_idle_conns: 72 + timeout: 2000ms + common: + compactor_address: 'http://loki-backend:3100' + path_prefix: /var/loki + replication_factor: 1 + storage: + s3: + access_key_id: enterprise-logs + bucketnames: chunks + endpoint: loki-storage.coder-observability.svc:9000 + insecure: true + s3forcepathstyle: true + secret_access_key: supersecret + frontend: + scheduler_address: "" + tail_proxy_url: "" + frontend_worker: + scheduler_address: "" + index_gateway: + mode: simple + limits_config: + max_cache_freshness_per_query: 10m + query_timeout: 300s + reject_old_samples: true + reject_old_samples_max_age: 168h + split_queries_by_interval: 15m + volume_enabled: true + memberlist: + join_members: + - loki-memberlist + pattern_ingester: + enabled: false + query_range: + align_queries_with_step: true + cache_results: true + results_cache: + cache: + background: + writeback_buffer: 500000 + writeback_goroutines: 1 + writeback_size_limit: 500MB + default_validity: 12h + memcached_client: + addresses: dnssrvnoa+_memcached-client._tcp.loki-results-cache.coder-observability.svc + consistent_hash: true + timeout: 500ms + update_interval: 1m + ruler: + alertmanager_url: http://alertmanager.coder-observability.svc + enable_alertmanager_v2: true + enable_api: true + remote_write: + clients: + fake: + headers: + Source: Loki + remote_timeout: 30s + url: http://prometheus.coder-observability.svc/api/v1/write + enabled: true + ring: + kvstore: + store: inmemory + rule_path: /rules + storage: + local: + directory: /rules + type: local + wal: + dir: /var/loki-ruler-wal + runtime_config: + file: /etc/loki/runtime-config/runtime-config.yaml + schema_config: + configs: + - from: "2024-04-01" + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v13 + store: tsdb + server: + grpc_listen_port: 9095 + http_listen_port: 3100 + http_server_read_timeout: 600s + http_server_write_timeout: 600s + storage_config: + boltdb_shipper: + index_gateway_client: + server_address: dns+loki-backend-headless.coder-observability.svc.cluster.local:9095 + hedging: + at: 250ms + max_per_second: 20 + up_to: 3 + tsdb_shipper: + index_gateway_client: + server_address: dns+loki-backend-headless.coder-observability.svc.cluster.local:9095 + tracing: + enabled: false +--- +# Source: coder-observability/charts/loki/templates/gateway/configmap-gateway.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-gateway + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: gateway +data: + nginx.conf: "worker_processes 5; ## Default: 1\nerror_log /dev/stderr;\npid /tmp/nginx.pid;\nworker_rlimit_nofile 8192;\n\nevents {\n worker_connections 4096; ## Default: 1024\n}\n\nhttp {\n client_body_temp_path /tmp/client_temp;\n proxy_temp_path /tmp/proxy_temp_path;\n fastcgi_temp_path /tmp/fastcgi_temp;\n uwsgi_temp_path /tmp/uwsgi_temp;\n scgi_temp_path /tmp/scgi_temp;\n\n client_max_body_size 4M;\n\n proxy_read_timeout 600; ## 10 minutes\n proxy_send_timeout 600;\n proxy_connect_timeout 600;\n\n proxy_http_version 1.1;\n\n default_type application/octet-stream;\n log_format main '$remote_addr - $remote_user [$time_local] $status '\n '\"$request\" $body_bytes_sent \"$http_referer\" '\n '\"$http_user_agent\" \"$http_x_forwarded_for\"';\n access_log /dev/stderr main;\n\n sendfile on;\n tcp_nopush on;\n resolver kube-dns.kube-system.svc.cluster.local.;\n \n\n server {\n listen 8080;\n listen [::]:8080;\n\n location = / {\n return 200 'OK';\n auth_basic off;\n }\n\n ########################################################\n # Configure backend targets# Distributor\n location = /api/prom/push {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/push {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /distributor/ring {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /otlp/v1/logs {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Ingester\n location = /flush {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location ^~ /ingester/ {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /ingester {\n internal; # to suppress 301\n }\n\n # Ring\n location = /ring {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # MemberListKV\n location = /memberlist {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Ruler\n location = /ruler/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /api/prom/rules {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location ^~ /api/prom/rules/ {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/rules {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location ^~ /loki/api/v1/rules/ {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /prometheus/api/v1/alerts {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /prometheus/api/v1/rules {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Compactor\n location = /compactor/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/delete {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1/cache/generation_numbers {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # IndexGateway\n location = /indexgateway/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # QueryScheduler\n location = /scheduler/ring {\n proxy_pass http://loki-backend.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n # Config\n location = /config {\n proxy_pass http://loki-write.coder-observability.svc.cluster.local:3100$request_uri;\n }\n\n\n # QueryFrontend, Querier\n location = /api/prom/tail {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n }\n location = /loki/api/v1/tail {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection \"upgrade\";\n }\n location ^~ /api/prom/ {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /api/prom {\n internal; # to suppress 301\n }\n location ^~ /loki/api/v1/ {\n proxy_pass http://loki-read.coder-observability.svc.cluster.local:3100$request_uri;\n }\n location = /loki/api/v1 {\n internal; # to suppress 301\n }\n }\n}\n" +--- +# Source: coder-observability/charts/loki/templates/monitoring/dashboards/configmap-1.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-dashboards-1 + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + grafana_dashboard: "1" +data: + "loki-chunks.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"series","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"chunks","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunks per series","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Active Series / Chunks","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Age","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Log Entries Per Chunk","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Index Entries","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Index Entries Per Chunk","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Queue Length","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Flush Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunks Flushed/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{reason}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Flush Reason","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":1,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Flush Stats","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":11,"legend":{"show":true},"span":12,"targets":[{"expr":"sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))","format":"heatmap","intervalFactor":2,"legendFormat":"{{le}}","refId":"A"}],"title":"Chunk Utilization","tooltip":{"show":true,"showHistogram":true},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"percentunit","show":true,"splitFactor":null},"yBucketBound":"auto"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":12,"legend":{"show":true},"span":12,"targets":[{"expr":"sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)","format":"heatmap","intervalFactor":2,"legendFormat":"{{le}}","refId":"A"}],"title":"Chunk Size Bytes","tooltip":{"show":true,"showHistogram":true},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"bytes","show":true,"splitFactor":null},"yBucketBound":"auto"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":13,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p99","legendLink":null,"step":10},{"expr":"histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p90","legendLink":null,"step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p50","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Size Quantiles","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Utilization","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p50","legendLink":null,"step":10},{"expr":"histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))","format":"time_series","intervalFactor":2,"legendFormat":"p99","legendLink":null,"step":10},{"expr":"sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"avg","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Chunk Duration hours (end-start)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Duration","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Chunks","uid":"chunks","version":0} + "loki-deletion.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"none","id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Number of Pending Requests","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"dtdurations","id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Oldest Pending Request Age","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))","format":"time_series","intervalFactor":2,"legendFormat":"received","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Requests Received / Day","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))","format":"time_series","intervalFactor":2,"legendFormat":"processed","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Requests Processed / Day","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Churn","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))","format":"time_series","intervalFactor":2,"legendFormat":"failures","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Failures in Loading Delete Requests / Hour","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Failures","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)","format":"time_series","intervalFactor":2,"legendFormat":"{{user}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Lines Deleted / Sec","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Deleted lines","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Deletion","uid":"deletion","version":0} + "loki-logs.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":8,"iteration":1583185057230,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":0,"y":0},"hiddenSeries":false,"id":35,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"goroutines","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":3,"y":0},"hiddenSeries":false,"id":41,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(go_gc_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)","legendFormat":"{{quantile}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"gc duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":6,"y":0},"hiddenSeries":false,"id":36,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"cpu","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":9,"y":0},"hiddenSeries":false,"id":40,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"working set","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":12,"y":0},"hiddenSeries":false,"id":38,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"tx","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":15,"y":0},"hiddenSeries":false,"id":39,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"rx","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":18,"y":0},"hiddenSeries":false,"id":37,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"increase(kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) \u003e 0","legendFormat":"{{reason}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"restarts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":3,"x":21,"y":0},"hiddenSeries":false,"id":42,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)","legendFormat":"{{level}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"bad words","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":4},"hiddenSeries":false,"id":31,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"warn","color":"#FF780A"},{"alias":"error","color":"#E02F44"},{"alias":"info","color":"#56A64B"},{"alias":"debug","color":"#3274D9"}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=~\"$level\" |= \"$filter\" [5m])) by (level)","intervalFactor":3,"legendFormat":"{{level}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"timeseries","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":19,"w":24,"x":0,"y":6},"id":29,"maxDataPoints":"","options":{"showLabels":false,"showTime":true,"sortOrder":"Descending","wrapLogMessage":true},"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=~\"$level\" |= \"$filter\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"deployment","options":[],"query":"label_values(kube_deployment_created{cluster=\"$cluster\", namespace=\"$namespace\"}, deployment)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":null,"multi":false,"name":"container","options":[],"query":"label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)","refresh":0,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"selected":true,"text":"","value":""},"hide":0,"includeAll":false,"label":"","multi":true,"name":"level","options":[{"selected":false,"text":"debug","value":"debug"},{"selected":false,"text":"info","value":"info"},{"selected":false,"text":"warn","value":"warn"},{"selected":false,"text":"error","value":"error"}],"query":"debug,info,warn,error","refresh":0,"type":"custom"},{"current":{"selected":false,"text":"","value":""},"label":"LogQL Filter","name":"filter","query":"","type":"textbox"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Logs","uid":"logs","version":0} + "loki-mixin-recording-rules.json": | + {"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations \u0026 Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"},{"datasource":"${datasource}","enable":false,"expr":"sum by (tenant) (changes(loki_ruler_wal_prometheus_tsdb_wal_truncations_total{tenant=~\"${tenant}\"}[$__rate_interval]))","iconColor":"red","name":"WAL Truncations","target":{"queryType":"Azure Monitor","refId":"Anno"},"titleFormat":"{{tenant}}"}]},"editable":true,"fiscalYearStartMonth":0,"gnetId":null,"graphTooltip":0,"iteration":1635347545534,"links":[],"liveNow":false,"panels":[{"datasource":"${datasource}","fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":1}]}},"overrides":[]},"gridPos":{"h":10,"w":2,"x":0,"y":0},"id":2,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.0-38205pre","targets":[{"datasource":"${datasource}","exemplar":false,"expr":"sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0","instant":true,"interval":"","legendFormat":"","refId":"A"}],"title":"Appenders Not Ready","type":"stat"},{"datasource":"${datasource}","description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":11,"x":2,"y":0},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Samples Appended to WAL per Second","type":"timeseries"},{"datasource":"${datasource}","description":"Series are unique combinations of labels","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":11,"x":13,"y":0},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Series Created per Second","type":"timeseries"},{"datasource":"${datasource}","description":"Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":10},"id":6,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Write Behind","type":"timeseries"},{"datasource":"${datasource}","description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":10},"id":7,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Samples Sent per Second","type":"timeseries"},{"datasource":"${datasource}","description":"\n","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":10,"w":12,"x":0,"y":20},"id":8,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"WAL Disk Size","type":"timeseries"},{"datasource":"${datasource}","description":"Some number of pending samples is expected, but if remote-write is failing this value will remain high","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":10,"w":12,"x":12,"y":20},"id":9,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom"},"tooltip":{"mode":"single"}},"targets":[{"datasource":"${datasource}","exemplar":true,"expr":"max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) \u003e 0","interval":"","legendFormat":"{{tenant}}","refId":"A"}],"title":"Pending Samples","type":"timeseries"}],"schemaVersion":31,"style":"dark","tags":[],"templating":{"list":[{"description":null,"error":null,"hide":0,"includeAll":false,"label":"Datasource","multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":null,"datasource":"${datasource}","definition":"label_values(loki_ruler_wal_samples_appended_total, tenant)","description":null,"error":null,"hide":0,"includeAll":true,"label":"Tenant","multi":true,"name":"tenant","options":[],"query":{"query":"label_values(loki_ruler_wal_samples_appended_total, tenant)","refId":"StandardVariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":0,"type":"query"}]},"time":{"from":"now-6h","to":"now"},"timepicker":{},"timezone":"","title":"Recording Rules","uid":"2xKA_ZK7k","version":9,"weekStart":""} + "loki-operational.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":68,"iteration":1588704280892,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"panels":[{"collapsed":false,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":17,"panels":[],"targets":[],"title":"Main","type":"row"},{"aliasColors":{"5xx":"red"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":0,"y":1},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)","legendFormat":"{{status}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Queries/Second","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":10,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{"5xx":"red"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":4,"y":1},"hiddenSeries":false,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))","legendFormat":"{{status}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Pushes/Second","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":10,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":12,"y":1},"hiddenSeries":false,"id":2,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))","legendFormat":"{{tenant}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Lines Per Tenant (top 10)","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":16,"y":1},"hiddenSeries":false,"id":4,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024","legendFormat":"{{tenant}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MBs Per Tenant (Top 10)","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":4,"x":20,"y":1},"hiddenSeries":false,"id":24,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) \u003e 0","hide":false,"interval":"","legendFormat":"{{container}}-{{pod}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Container Restarts","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":6},"hiddenSeries":false,"id":9,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.75, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Push Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":6},"hiddenSeries":false,"id":12,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Distributor Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":6},"hiddenSeries":false,"id":71,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Distributor Success Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":11},"hiddenSeries":false,"id":13,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","hide":false,"legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3","hide":false,"legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Latency Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":11},"hiddenSeries":false,"id":72,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Success Rate Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":10,"w":12,"x":0,"y":16},"hiddenSeries":false,"id":10,"legend":{"alignAsTable":true,"avg":false,"current":false,"hideEmpty":true,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.99","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.9","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))","legendFormat":"{{route}}-.5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Query Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":16},"hiddenSeries":false,"id":14,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".99-{{route}}","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".9-{{route}}","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3","legendFormat":".5-{{route}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Querier Latency","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":16},"hiddenSeries":false,"id":73,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Querier Success Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":12,"y":21},"hiddenSeries":false,"id":15,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".99-{{route}}","refId":"A"},{"expr":"histogram_quantile(0.9, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".9-{{route}}","refId":"B"},{"expr":"histogram_quantile(0.5, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3","legendFormat":".5-{{route}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Latency Read","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"custom":{}},"overrides":[]},"fill":0,"fillGradient":0,"gridPos":{"h":5,"w":6,"x":18,"y":21},"hiddenSeries":false,"id":74,"legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[5m])) by (route)","interval":"","legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Ingester Success Rate Read","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percentunit","label":"","logBase":1,"max":"1","min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":110,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":27},"hiddenSeries":false,"id":112,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))","interval":"","legendFormat":"{{ tenant }} - {{ reason }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Discarded Lines","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"columns":[],"datasource":"$datasource","fontSize":"100%","gridPos":{"h":8,"w":12,"x":12,"y":27},"id":113,"pageSize":null,"panels":[],"showHeader":true,"sort":{"col":3,"desc":true},"styles":[{"alias":"Time","align":"auto","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"","align":"auto","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"tenant","thresholds":[],"type":"string","unit":"short"},{"alias":"","align":"auto","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"reason","thresholds":[],"type":"number","unit":"short"},{"alias":"","align":"right","colorMode":null,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":2,"pattern":"/.*/","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))","format":"table","instant":true,"interval":"","legendFormat":"{{ tenant }} - {{ reason }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Discarded Lines Per Interval","transform":"table","type":"table-old"}],"targets":[],"title":"Limits","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":27},"id":23,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":28},"hiddenSeries":false,"id":26,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}","intervalFactor":3,"legendFormat":"{{pod}}-{{container}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":28},"hiddenSeries":false,"id":27,"legend":{"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}","instant":false,"intervalFactor":3,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":4,"w":12,"x":12,"y":28},"hiddenSeries":false,"id":31,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"{}","color":"#C4162A"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"[1m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Error Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":18,"w":12,"x":12,"y":32},"id":29,"options":{"showLabels":false,"showTime":false,"sortOrder":"Descending","wrapLogMessage":true},"panels":[],"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":35},"hiddenSeries":false,"id":33,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[5m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[5m])) by (route)","interval":"","intervalFactor":1,"legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Success Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":35},"hiddenSeries":false,"id":32,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (ingester)","intervalFactor":1,"legendFormat":"{{ingester}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Append Failures By Ingester","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":42},"hiddenSeries":false,"id":34,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Bytes Received/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":42},"hiddenSeries":false,"id":35,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)","intervalFactor":1,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Lines Received/Second","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Write Path","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":104,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":30},"hiddenSeries":false,"id":106,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}))","interval":"","legendFormat":"{{ tenant }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Active Streams","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":30},"hiddenSeries":false,"id":108,"legend":{"avg":false,"current":false,"hideEmpty":true,"hideZero":true,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]) \u003e 0))","interval":"","legendFormat":"{{ tenant }}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Streams Created/Sec","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Streams","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":94,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":31},"hiddenSeries":false,"id":102,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"De-Dupe Ratio","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))","interval":"","legendFormat":"Chunks","refId":"A"},{"expr":"sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) \u003c 1","interval":"","legendFormat":"De-Dupe Ratio","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Chunks Flushed/Sec","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","gridPos":{"h":8,"w":12,"x":12,"y":31},"heatmap":{},"hideZeroBuckets":false,"highlightCards":true,"id":100,"legend":{"show":true},"panels":[],"reverseYBuckets":false,"targets":[{"expr":"sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) by (le)","format":"heatmap","instant":false,"interval":"","legendFormat":"{{ le }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Chunk Size Bytes","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"bytes","logBase":1,"max":null,"min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":7,"fillGradient":0,"gridPos":{"h":9,"w":12,"x":0,"y":39},"hiddenSeries":false,"id":96,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))","interval":"","legendFormat":"{{ reason }}"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Chunk Flush Reason %","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":"1","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cards":{"cardPadding":null,"cardRound":null},"color":{"cardColor":"#b4ff00","colorScale":"sqrt","colorScheme":"interpolateSpectral","exponent":0.5,"max":null,"min":null,"mode":"spectrum"},"dataFormat":"tsbuckets","datasource":"$datasource","gridPos":{"h":9,"w":12,"x":12,"y":39},"heatmap":{},"hideZeroBuckets":true,"highlightCards":true,"id":98,"legend":{"show":true},"panels":[],"reverseYBuckets":false,"targets":[{"expr":"sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))","format":"heatmap","instant":false,"interval":"","legendFormat":"{{ le }}","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Chunk Utilization","tooltip":{"show":true,"showHistogram":false},"type":"heatmap","xAxis":{"show":true},"xBucketNumber":null,"xBucketSize":null,"yAxis":{"decimals":0,"format":"percentunit","logBase":1,"max":null,"min":null,"show":true,"splitFactor":null},"yBucketBound":"auto","yBucketNumber":null,"yBucketSize":null}],"targets":[],"title":"Chunks","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":31},"id":64,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":32},"hiddenSeries":false,"id":68,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}","intervalFactor":3,"legendFormat":"{{pod}}-{{container}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":39},"hiddenSeries":false,"id":69,"legend":{"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":true,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}","instant":false,"intervalFactor":3,"legendFormat":"{{pod}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":true,"dashLength":10,"dashes":false,"datasource":"$logs","fill":1,"fillGradient":0,"gridPos":{"h":3,"w":18,"x":12,"y":32},"hiddenSeries":false,"id":65,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":false,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"{}","color":"#F2495C"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"[1m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Error Log Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"datasource":"$logs","gridPos":{"h":18,"w":18,"x":12,"y":35},"id":66,"options":{"showLabels":false,"showTime":false,"sortOrder":"Descending","wrapLogMessage":true},"panels":[],"targets":[{"expr":"{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"","refId":"A"}],"timeFrom":null,"timeShift":null,"title":"Logs","type":"logs"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":46},"hiddenSeries":false,"id":70,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[1m])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[1m])) by (route)","interval":"","intervalFactor":1,"legendFormat":"{{route}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Success Rate","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Read Path","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":52,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":30},"hiddenSeries":false,"id":53,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","intervalFactor":1,"legendFormat":"{{container}}: .99-{{method}}-{{name}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","hide":false,"legendFormat":"{{container}}: .9-{{method}}-{{name}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))","hide":false,"legendFormat":"{{container}}: .5-{{method}}-{{name}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":38},"hiddenSeries":false,"id":54,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)","intervalFactor":1,"legendFormat":"{{container}}: {{status_code}}-{{method}}-{{name}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Memcached","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":33},"id":57,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":31},"hiddenSeries":false,"id":55,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":39},"hiddenSeries":false,"id":58,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Consul","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":43,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":9},"hiddenSeries":false,"id":41,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".9","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MutateRows Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":9},"hiddenSeries":false,"id":46,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","intervalFactor":1,"legendFormat":"99%","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","legendFormat":"90%","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))","interval":"","legendFormat":"50%","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ReadRows Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":12,"y":9},"hiddenSeries":false,"id":44,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","intervalFactor":1,"legendFormat":"99%","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","legendFormat":"90%","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))","interval":"","legendFormat":"50%","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"GetTable Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":18,"y":9},"hiddenSeries":false,"id":45,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".9","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ListTables Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":0,"y":16},"hiddenSeries":false,"id":47,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"MutateRows Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":6,"y":16},"hiddenSeries":false,"id":50,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ReadRows Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":12,"y":16},"hiddenSeries":false,"id":48,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"GetTable Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":7,"w":6,"x":18,"y":16},"hiddenSeries":false,"id":49,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)","intervalFactor":1,"legendFormat":"{{status_code}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"ListTables Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Big Table","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":35},"id":60,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":33},"hiddenSeries":false,"id":61,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":41},"hiddenSeries":false,"id":62,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"GCS","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":36},"id":76,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":0,"y":9},"id":82,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Failure Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":6,"y":9},"id":83,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Consumed Capacity Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":12,"y":9},"id":84,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Throttled Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":18,"y":9},"id":85,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Dropped Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":null,"fill":1,"fillGradient":0,"gridPos":{"h":6,"w":6,"x":0,"y":15},"id":86,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".99","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".9","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))","legendFormat":".5","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Query Pages","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":6,"w":9,"x":6,"y":15},"id":87,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":6,"w":9,"x":15,"y":15},"id":88,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Dynamo","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":78,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":79,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":80,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"S3","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":78,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":79,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":80,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"Azure Blob","type":"row"},{"collapsed":true,"datasource":null,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":114,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":10},"id":115,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","intervalFactor":1,"legendFormat":".99-{{operation}}","refId":"A"},{"expr":"histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".9-{{operation}}","refId":"B"},{"expr":"histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))","hide":false,"legendFormat":".5-{{operation}}","refId":"C"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Latency By Operation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"gridPos":{"h":8,"w":24,"x":0,"y":18},"id":116,"interval":"","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"nullPointMode":"null","options":{"dataLinks":[]},"panels":[],"percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)","intervalFactor":1,"legendFormat":"{{status_code}}-{{operation}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Status By Method","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"targets":[],"title":"BoltDB Shipper","type":"row"}],"refresh":"10s","rows":[],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Operational","uid":"operational","version":0} +--- +# Source: coder-observability/charts/loki/templates/monitoring/dashboards/configmap-2.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-dashboards-2 + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + grafana_dashboard: "1" +data: + "loki-reads-resources.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Writes","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Reads","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{persistentvolumeclaim}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_query_readiness_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}","format":"time_series","intervalFactor":2,"legendFormat":"duration","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Query Readiness Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Read path","titleSize":"h6","type":"row"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Ingester","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Reads Resources","uid":"reads-resources","version":0} + "loki-reads.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} 99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum by (le,route) (job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} 50th Percentile","refId":"B","step":10},{"expr":"1e3 * sum(job_route:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) / sum(job_route:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}) by (route) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ route }} Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Read Path","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"BoltDB Shipper","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Reads","uid":"reads","version":0} + "loki-retention.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Resource Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fieldConfig":{"defaults":{"color":{"fixedColor":"blue","mode":"fixed"},"custom":{},"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]},"unit":"dateTimeFromNow"}},"fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"text":{},"textMode":"auto"},"percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3","format":"time_series","instant":true,"refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Last Compact and Mark Operation Success","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"stat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}","format":"time_series","intervalFactor":2,"legendFormat":"duration","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Compact and Mark Operations Duration","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{success}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Compact and Mark Operations Per Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Compact and Mark","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{action}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Processed Tables Per Action","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{table}}-{{action}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Modified Tables","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) \u003e0","format":"time_series","intervalFactor":2,"legendFormat":"{{table}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Marks Creation Rate Per Table","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Per Table Marker","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"short","id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Marked Chunks (24h)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Mark Table Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"short","id":12,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))","format":"time_series","instant":true,"intervalFactor":2,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Delete Chunks (24h)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":13,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Sweeper","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":14,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"lag","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sweeper Lag","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":15,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})","format":"time_series","intervalFactor":2,"legendFormat":"count","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Marks Files to Process","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":16,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Delete Rate Per Status","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"datasource":"$logs","id":17,"span":12,"targets":[{"expr":"{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}","refId":"A"}],"title":"Compactor Logs","type":"logs"}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Logs","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"hide":0,"label":null,"name":"logs","options":[],"query":"loki","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Retention","uid":"retention","version":0} + "loki-writes-resources.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"In-memory streams","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"limit","color":"#E02F44","fill":0}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10},{"expr":"min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} \u003e 0)","format":"time_series","intervalFactor":2,"legendFormat":"limit","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (workingset)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory (go heap inuse)","tooltip":{"sort":2},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Writes","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"gridPos":{},"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{pod}} - {{device}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Reads","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"gridPos":{},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-write.*\"})","format":"time_series","intervalFactor":2,"legendFormat":"{{persistentvolumeclaim}}","legendLink":null,"step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Utilization","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Write path","titleSize":"h6","type":"row"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Writes Resources","uid":"writes-resources","version":0} + "loki-writes.json": | + {"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[{"asDropdown":true,"icon":"external link","includeVars":true,"keepTime":true,"tags":["loki"],"targetBlank":false,"title":"Loki Dashboards","type":"dashboards"}],"refresh":"10s","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum by (le) (job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"1e3 * sum(job:loki_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"}) / sum(job:loki_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Write Path","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{"1xx":"#EAB839","2xx":"#7EB26D","3xx":"#6ED0E0","4xx":"#EF843C","5xx":"#E24D42","error":"#E24D42","success":"#7EB26D"},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{status}}","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"QPS","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"99th Percentile","refId":"A","step":10},{"expr":"histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3","format":"time_series","intervalFactor":2,"legendFormat":"50th Percentile","refId":"B","step":10},{"expr":"sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]))","format":"time_series","intervalFactor":2,"legendFormat":"Average","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Latency","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"BoltDB Shipper","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["loki"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data Source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(loki_build_info, cluster)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{"text":"prod","value":"prod"},"datasource":"$datasource","hide":0,"includeAll":false,"label":"namespace","multi":false,"name":"namespace","options":[],"query":"label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)","refresh":1,"regex":"","sort":2,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"utc","title":"Loki / Writes","uid":"writes","version":0} +--- +# Source: coder-observability/charts/loki/templates/runtime-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-runtime + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +data: + runtime-config.yaml: | + {} +--- +# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + namespace: coder-observability +data: + alertmanager.yml: | + global: {} + receivers: + - name: default-receiver + route: + group_interval: 5m + group_wait: 10s + receiver: default-receiver + repeat_interval: 3h + templates: + - /etc/alertmanager/*.tmpl +--- +# Source: coder-observability/charts/prometheus/templates/cm.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus + namespace: coder-observability +data: + allow-snippet-annotations: "false" + alerting_rules.yml: | + {} + alerts: | + {} + prometheus.yml: | + global: + evaluation_interval: 30s + scrape_interval: 1m + scrape_timeout: 10s + rule_files: + - /etc/config/alerts/*.yaml + scrape_configs: [] + alerting: + alertmanagers: + - kubernetes_sd_configs: + - role: pod + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + regex: coder-observability + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + regex: coder-observability + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + regex: alertmanager + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: "9093" + action: keep + recording_rules.yml: | + {} + rules: | + {} +--- +# Source: coder-observability/templates/configmap-collector.yaml +kind: ConfigMap +apiVersion: v1 +metadata: + name: collector-config + namespace: coder-observability +data: + config.river: "\n// Discover k8s nodes\ndiscovery.kubernetes \"nodes\" {\n role = \"node\"\n}\n\n// Discover k8s pods\ndiscovery.kubernetes \"pods\" {\n role = \"pod\"\n selectors {\n role = \"pod\"\n }\n}\n\ndiscovery.relabel \"pod_logs\" {\n targets = discovery.kubernetes.pods.targets\n \n rule {\n source_labels = [\"__meta_kubernetes_namespace\"]\n target_label = \"namespace\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_name\"]\n target_label = \"pod\"\n }\n // coalesce the following labels and pick the first value; we'll use this to define the \"job\" label\n rule {\n source_labels = [\"__meta_kubernetes_pod_label_app_kubernetes_io_component\", \"app\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n target_label = \"__meta_app\"\n action = \"replace\"\n regex = \"^/*([^/]+?)(?:/.*)?$\" // split by the delimiter if it exists, we only want the first one\n replacement = \"${1}\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_namespace\", \"__meta_kubernetes_pod_label_app_kubernetes_io_name\", \"__meta_app\"]\n separator = \"/\"\n target_label = \"job\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_container_name\"]\n target_label = \"container\"\n }\n rule {\n regex = \"__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)\"\n action = \"labeldrop\"\n }\n rule {\n regex = \"pod_template_generation\"\n action = \"labeldrop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_phase\"]\n regex = \"Pending|Succeeded|Failed|Completed\"\n action = \"drop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_node_name\"]\n action = \"replace\"\n target_label = \"node\"\n }\n rule {\n action = \"labelmap\"\n regex = \"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)\"\n replacement = \"__param_$1\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_uid\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n action = \"replace\"\n replacement = \"/var/log/pods/*$1/*.log\"\n target_label = \"__path__\"\n }\n rule {\n action = \"replace\"\n source_labels = [\"__meta_kubernetes_pod_container_id\"]\n regex = \"^(\\\\w+):\\\\/\\\\/.+$\"\n replacement = \"$1\"\n target_label = \"tmp_container_runtime\"\n }\n}\n\ndiscovery.relabel \"pod_metrics\" {\n targets = discovery.kubernetes.pods.targets\n \n rule {\n source_labels = [\"__meta_kubernetes_namespace\"]\n target_label = \"namespace\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_name\"]\n target_label = \"pod\"\n }\n // coalesce the following labels and pick the first value; we'll use this to define the \"job\" label\n rule {\n source_labels = [\"__meta_kubernetes_pod_label_app_kubernetes_io_component\", \"app\", \"__meta_kubernetes_pod_container_name\"]\n separator = \"/\"\n target_label = \"__meta_app\"\n action = \"replace\"\n regex = \"^/*([^/]+?)(?:/.*)?$\" // split by the delimiter if it exists, we only want the first one\n replacement = \"${1}\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_namespace\", \"__meta_kubernetes_pod_label_app_kubernetes_io_name\", \"__meta_app\"]\n separator = \"/\"\n target_label = \"job\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_container_name\"]\n target_label = \"container\"\n }\n rule {\n regex = \"__meta_kubernetes_pod_label_(statefulset_kubernetes_io_pod_name|controller_revision_hash)\"\n action = \"labeldrop\"\n }\n rule {\n regex = \"pod_template_generation\"\n action = \"labeldrop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_phase\"]\n regex = \"Pending|Succeeded|Failed|Completed\"\n action = \"drop\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_node_name\"]\n action = \"replace\"\n target_label = \"node\"\n }\n rule {\n action = \"labelmap\"\n regex = \"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)\"\n replacement = \"__param_$1\"\n }\n // drop ports that do not expose Prometheus metrics, but might otherwise be exposed by a container which *also*\n // exposes an HTTP port which exposes metrics\n rule {\n source_labels = [\"__meta_kubernetes_pod_container_port_name\"]\n regex = \"grpc|http-(memberlist|console)\"\n action = \"drop\"\n }\n // adapted from the Prometheus helm chart\n // https://github.com/prometheus-community/helm-charts/blob/862870fc3c847e32479b509e511584d5283126a3/charts/prometheus/values.yaml#L1070\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_scrape\"]\n action = \"keep\"\n regex = \"true\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_scheme\"]\n action = \"replace\"\n regex = \"(https?)\"\n target_label = \"__scheme__\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_path\"]\n action = \"replace\"\n target_label = \"__metrics_path__\"\n regex = \"(.+)\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_port\", \"__meta_kubernetes_pod_ip\"]\n action = \"replace\"\n regex = \"(\\\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})\"\n replacement = \"[$2]:$1\"\n target_label = \"__address__\"\n }\n rule {\n source_labels = [\"__meta_kubernetes_pod_annotation_prometheus_io_port\", \"__meta_kubernetes_pod_ip\"]\n action = \"replace\"\n regex = \"(\\\\d+);((([0-9]+?)(\\\\.|$)){4})\"\n replacement = \"$2:$1\"\n target_label = \"__address__\"\n }\n}\n\nlocal.file_match \"pod_logs\" {\n path_targets = discovery.relabel.pod_logs.output\n}\n\nloki.source.file \"pod_logs\" {\n targets = local.file_match.pod_logs.targets\n forward_to = [loki.process.pod_logs.receiver]\n}\n\nloki.process \"pod_logs\" {\n stage.match {\n selector = \"{tmp_container_runtime=\\\"containerd\\\"}\"\n // the cri processing stage extracts the following k/v pairs: log, stream, time, flags\n stage.cri {}\n // Set the extract flags and stream values as labels\n stage.labels {\n values = {\n flags = \"\",\n stream = \"\",\n }\n }\n }\n\n // if the label tmp_container_runtime from above is docker parse using docker\n stage.match {\n selector = \"{tmp_container_runtime=\\\"docker\\\"}\"\n // the docker processing stage extracts the following k/v pairs: log, stream, time\n stage.docker {}\n\n // Set the extract stream value as a label\n stage.labels {\n values = {\n stream = \"\",\n }\n }\n }\n\n // drop the temporary container runtime label as it is no longer needed\n stage.label_drop {\n values = [\"tmp_container_runtime\"]\n }\n\n // parse Coder logs and extract level & logger for efficient filtering\n stage.match {\n selector = \"{pod=~\\\"coder.*\\\"}\" // TODO: make configurable\n\n stage.multiline {\n firstline = \"^(?P\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3})\"\n max_wait_time = \"10s\"\n }\n\n stage.regex {\n expression = \"^(?P\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\s\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3})\\\\s\\\\[(?P\\\\w+)\\\\]\\\\s\\\\s(?P[^:]+):\\\\s(?P.+)\"\n }\n\n stage.timestamp {\n source = \"ts\"\n format = \"2006-01-02 15:04:05.000\"\n action_on_failure = \"fudge\" // rather have inaccurate time than drop the log line\n }\n\n stage.labels {\n values = {\n level = \"\",\n logger = \"\",\n }\n }\n }\n\n forward_to = [loki.write.loki.receiver]\n}\n\nloki.write \"loki\" {\n endpoint {\n url = \"http://loki-gateway.coder-observability.svc/loki/api/v1/push\"\n }\n}\n\nprometheus.scrape \"pods\" {\n targets = discovery.relabel.pod_metrics.output\n forward_to = [prometheus.relabel.pods.receiver]\n\n scrape_interval = \"15s\"\n scrape_timeout = \"12s\"\n}\n\n// These are metric_relabel_configs while discovery.relabel are relabel_configs.\n// See https://github.com/grafana/agent/blob/main/internal/converter/internal/prometheusconvert/prometheusconvert.go#L95-L106\nprometheus.relabel \"pods\" {\n forward_to = [prometheus.remote_write.default.receiver]\n\n // Drop kube-state-metrics' labels which clash with ours\n rule {\n source_labels = [\"__name__\", \"container\"]\n regex = \"kube_pod.+;(.+)\"\n target_label = \"container\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"__name__\", \"pod\"]\n regex = \"kube_pod.+;(.+)\"\n target_label = \"pod\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"__name__\", \"namespace\"]\n regex = \"kube_pod.+;(.+)\"\n target_label = \"namespace\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"__name__\", \"exported_container\"]\n // don't replace an empty label\n regex = \"^kube_pod.+;(.+)$\"\n target_label = \"container\"\n replacement = \"$1\"\n }\n rule {\n source_labels = [\"__name__\", \"exported_pod\"]\n // don't replace an empty label\n regex = \"^kube_pod.+;(.+)$\"\n target_label = \"pod\"\n replacement = \"$1\"\n }\n rule {\n source_labels = [\"__name__\", \"exported_namespace\"]\n // don't replace an empty label\n regex = \"^kube_pod.+;(.+)$\"\n target_label = \"namespace\"\n replacement = \"$1\"\n }\n rule {\n regex = \"^(exported_.*|image_.*|container_id|id|uid)$\"\n action = \"labeldrop\"\n }\n}\n\ndiscovery.relabel \"cadvisor\" {\n targets = discovery.kubernetes.nodes.targets\n rule {\n replacement = \"/metrics/cadvisor\"\n target_label = \"__metrics_path__\"\n }\n}\n\nprometheus.scrape \"cadvisor\" {\n targets = discovery.relabel.cadvisor.output\n forward_to = [ prometheus.relabel.cadvisor.receiver ]\n scheme = \"https\"\n tls_config {\n insecure_skip_verify = true\n }\n bearer_token_file = \"/var/run/secrets/kubernetes.io/serviceaccount/token\"\n scrape_interval = \"15s\"\n scrape_timeout = \"12s\"\n}\n\nprometheus.relabel \"cadvisor\" {\n forward_to = [ prometheus.remote_write.default.receiver ]\n\n // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688\n rule {\n source_labels = [\"__name__\",\"container\"]\n separator = \"@\"\n regex = \"(container_cpu_.*|container_fs_.*|container_memory_.*)@\"\n action = \"drop\"\n }\n // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688\n rule {\n source_labels = [\"__name__\",\"image\"]\n separator = \"@\"\n regex = \"(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@\"\n action = \"drop\"\n }\n // Drop irrelevant series\n rule {\n source_labels = [\"container\"]\n regex = \"^POD$\"\n action = \"drop\"\n }\n // Drop unnecessary labels\n rule {\n source_labels = [\"id\"]\n target_label = \"id\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"job\"]\n target_label = \"job\"\n replacement = \"\"\n }\n rule {\n source_labels = [\"name\"]\n target_label = \"name\"\n replacement = \"\"\n }\n}\n\nprometheus.remote_write \"default\" {\n endpoint {\n url =\"http://prometheus.coder-observability.svc/api/v1/write\"\n\n // drop instance label which unnecessarily adds new series when pods are restarted, since pod IPs are dynamically assigned\n // NOTE: \"__address__\" is mapped to \"instance\", so will contain :\n write_relabel_config {\n regex = \"instance\"\n action = \"labeldrop\"\n }\n }\n}" +--- +# Source: coder-observability/templates/configmap-prometheus-alerts.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: metrics-alerts + namespace: coder-observability +data: + coderd.yaml: "groups:\n- name: CPU Usage\n rules:\n \n - alert: CoderdCPUUsage\n expr: max by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) / max by(pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"}) > 0.9\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of CPU, which may impact application performance.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdcpuusage\n - alert: CoderdCPUUsage\n expr: max by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) / max by(pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"}) > 0.8\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of CPU, which may impact application performance.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdcpuusage\n- name: Memory Usage\n rules:\n \n - alert: CoderdMemoryUsage\n expr: max by (pod) (container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) / max by (pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"}) > 0.9\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of memory, which may lead to an Out-Of-Memory (OOM) error.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdmemoryusage\n - alert: CoderdMemoryUsage\n expr: max by (pod) (container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) / max by (pod) (kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"}) > 0.8\n for: 10m\n annotations:\n summary: The Coder instance {{ $labels.pod }} is using high amounts of memory, which may lead to an Out-Of-Memory (OOM) error.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdmemoryusage\n- name: Pod Restarts\n rules:\n \n - alert: CoderdRestarts\n expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) > 3\n for: 1m\n annotations:\n summary: The Coder instance {{ $labels.pod }} has restarted multiple times in the last 10m, which may indicate a CrashLoop.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdrestarts\n - alert: CoderdRestarts\n expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) > 1\n for: 1m\n annotations:\n summary: The Coder instance {{ $labels.pod }} has restarted multiple times in the last 10m, which may indicate a CrashLoop.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdrestarts\n - alert: CoderdRestarts\n expr: sum by(pod) (increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[10m])) > 2\n for: 1m\n annotations:\n summary: The Coder instance {{ $labels.pod }} has restarted multiple times in the last 10m, which may indicate a CrashLoop.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdrestarts\n- name: Coderd Replicas\n rules:\n \n - alert: CoderdReplicas\n expr: sum(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 1\n for: 5m\n annotations:\n summary: Number of alive coderd replicas is below the threshold = 1.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdreplicas\n - alert: CoderdReplicas\n expr: sum(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 3\n for: 5m\n annotations:\n summary: Number of alive coderd replicas is below the threshold = 3.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdreplicas\n - alert: CoderdReplicas\n expr: sum(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 2\n for: 5m\n annotations:\n summary: Number of alive coderd replicas is below the threshold = 2.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdreplicas\n- name: Coderd Workspace Build Failures\n rules:\n \n - alert: CoderdWorkspaceBuildFailures\n expr: sum(increase(coderd_workspace_builds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, status=\"failed\" }[10m])) > 10\n for: 10m\n annotations:\n summary: Workspace builds have failed multiple times in the last 10m, which may indicate a broken Coder template.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdworkspacebuildfailures\n - alert: CoderdWorkspaceBuildFailures\n expr: sum(increase(coderd_workspace_builds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, status=\"failed\" }[10m])) > 2\n for: 10m\n annotations:\n summary: Workspace builds have failed multiple times in the last 10m, which may indicate a broken Coder template.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdworkspacebuildfailures\n - alert: CoderdWorkspaceBuildFailures\n expr: sum(increase(coderd_workspace_builds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, status=\"failed\" }[10m])) > 5\n for: 10m\n annotations:\n summary: Workspace builds have failed multiple times in the last 10m, which may indicate a broken Coder template.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdworkspacebuildfailures\n- name: Coderd Ineligible Prebuilds\n rules:\n \n - alert: CoderdIneligiblePrebuilds\n expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_running - coderd_prebuilt_workspaces_eligible) > 0\n for: 10m\n annotations:\n summary: >\n {{ $value }} prebuilt workspace(s) are currently ineligible for claiming for the \"{{ $labels.template_name }}\" template and \"{{ $labels.preset_name }}\" preset.\n This usually indicates that the agent has not started correctly, or is still running its startup scripts after an extended period of time.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdineligibleprebuilds\n- name: Coderd Unprovisioned Prebuilt Workspaces\n rules:\n \n - alert: CoderdUnprovisionedPrebuiltWorkspaces\n expr: max by (template_name, preset_name) (coderd_prebuilt_workspaces_desired - coderd_prebuilt_workspaces_running) > 0\n for: 10m\n annotations:\n summary: >\n {{ $value }} prebuilt workspace(s) not yet been provisioned for the \"{{ $labels.template_name }}\" template and \"{{ $labels.preset_name }}\" preset.\n labels:\n severity: warn\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#coderdunprovisionedprebuiltworkspaces " + provisionerd.yaml: "groups:\n- name: Provisionerd Replicas\n rules:\n \n - alert: ProvisionerdReplicas\n expr: sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 1\n for: 5m\n annotations:\n summary: Number of alive provisionerd replicas is below the threshold = 1.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#provisionerdreplicas\n - alert: ProvisionerdReplicas\n expr: sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 3\n for: 5m\n annotations:\n summary: Number of alive provisionerd replicas is below the threshold = 3.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#provisionerdreplicas\n - alert: ProvisionerdReplicas\n expr: sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}) < 2\n for: 5m\n annotations:\n summary: Number of alive provisionerd replicas is below the threshold = 2.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/coderd#provisionerdreplicas " + enterprise.yaml: "groups:\n - name: Licences\n rules:\n \n - alert: CoderLicenseSeats\n expr: 'max(coderd_license_active_users) / max(coderd_license_limit_users) >=1'\n for: 1m\n annotations:\n summary: Your Coder enterprise licence usage is now at {{ $value | humanizePercentage }} capacity.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/enterprise#coderlicenseseats\n - alert: CoderLicenseSeats\n expr: 'max(coderd_license_active_users) / max(coderd_license_limit_users) >=0.9'\n for: 1m\n annotations:\n summary: Your Coder enterprise licence usage is now at {{ $value | humanizePercentage }} capacity.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/enterprise#coderlicenseseats " + postgres.yaml: "groups:\n- name: Notifications\n rules:\n \n - alert: PostgresNotificationQueueFillingUp\n expr: pg_pubsub_usage > 0.9\n for: 15m\n annotations:\n summary: The postgres instance {{ $labels.instance }} has a notification that is filling up, which may impact application performance.\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresnotificationqueuefillingup\n - alert: PostgresNotificationQueueFillingUp\n expr: pg_pubsub_usage > 0.5\n for: 15m\n annotations:\n summary: The postgres instance {{ $labels.instance }} has a notification that is filling up, which may impact application performance.\n labels:\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresnotificationqueuefillingup\n - alert: PostgresNotificationQueueFillingUp\n expr: pg_pubsub_usage > 0.8\n for: 15m\n annotations:\n summary: The postgres instance {{ $labels.instance }} has a notification that is filling up, which may impact application performance.\n labels:\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresnotificationqueuefillingup\n\n- name: Liveness\n rules:\n \n - alert: PostgresDown\n expr: pg_up == 0\n for: 1m\n annotations:\n summary: The postgres instance {{ $labels.instance }} is down!\n labels:\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresdown\n\n\n- name: Connections\n rules:\n \n - alert: PostgresConnectionsRunningLow\n expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * 0.9)\n for: 5m\n labels:\n summary: The postgres instance {{ $labels.instance }} is running low on connections which may impact application performance.\n severity: critical\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresconnectionsrunninglow\n - alert: PostgresConnectionsRunningLow\n expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * 0.5)\n for: 5m\n labels:\n summary: The postgres instance {{ $labels.instance }} is running low on connections which may impact application performance.\n severity: notify\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresconnectionsrunninglow\n - alert: PostgresConnectionsRunningLow\n expr: sum by (datname, instance) (pg_stat_activity_count) > on () group_left() (pg_settings_max_connections * 0.8)\n for: 5m\n labels:\n summary: The postgres instance {{ $labels.instance }} is running low on connections which may impact application performance.\n severity: warning\n runbook_url: http://runbook-viewer.coder-observability.svc.cluster.local/postgres#postgresconnectionsrunninglow" +--- +# Source: coder-observability/templates/configmap-runbooks.yaml +kind: ConfigMap +apiVersion: v1 +metadata: + name: runbooks + namespace: coder-observability + annotations: + checksum/config: b0c41033d0385ee3d46488f08e85bcef0d939614dcb99194e0c5913dbf0c2c33 +data: + coderd.md: |- + # Coderd Runbooks + + ## CoderdCPUUsage + + The CPU usage of one or more Coder pods has been close to the limit defined for + the deployment. This can cause slowness in the application, workspaces becoming + unavailable, and may lead to the application failing its liveness probes and + being restarted. + + To resolve this issue, increase the CPU limits of the Coder deployment. + + If you find this occurring frequently, you may wish to check your Coder + deployment against [Coder's Reference Architectures](https://coder.com/docs/v2/latest/admin/architectures). + + ## CoderdMemoryUsage + + The memory usage of one or more Coder pods has been close to the limit defined + for the deployment. When the memory usage exceeds the limit, the pod(s) will be + restarted by Kubernetes. This will interrupt all connections to workspaces being + handled by the affected pod(s). + + To resolve this issue, increase the memory limits of the Coder deployment. + + If you find this occurring frequently, check the memory usage over a longer + period of time. If it appears to be increasing monotonically, this is likely a + memory leak and should be considered a bug. + + ## CoderdRestarts + + One or more Coder pods have been restarting multiple times in the last 10 + minutes. This may be due to a number of issues, including: + + - Failure to connect to the configured database: Coder requires a reachable + PostgreSQL database to function. If it fails to connect, you will see an error + similar to the following: + + ```console + [warn] ping postgres: retrying error="dial tcp 10.43.94.60:5432: connect: connection refused" try=3 + ``` + + - Out-Of-Memory (OOM) kills due to memory usage (see [above](#codermemoryusage)), + - An unexpected bug causing the application to exit with an error. + + If Coder is not restarting due to excessive memory usage, check the logs: + + 1. Check the logs of the deployment for any errors, + + ```console + kubectl -n logs deployment/coder --previous + ``` + + 2. Check any Kubernetes events related to the deployment, + + ```console + kubectl -n events --watch + ``` + + ## CoderdReplicas + + One or more Coderd replicas are down. This may cause availability problems and elevated + response times for user and agent API calls. + + To resolve this issue, review the Coder deployment for possible `CrashLoopBackOff` + instances or re-adjust alarm levels based on the actual number of replicas. + + ## CoderdWorkspaceBuildFailures + + A few workspace build errors have been recently observed. + + Review Prometheus metrics to identify failed jobs. Check the workspace build logs + to determine if there is a relationship with a new template version or a buggy + Terraform plugin. + + ## CoderdLicenseSeats + + Your Enterprise license is approaching or has exceeded the number of seats purchased. + + Please contact your Coder sales contact, or visit https://coder.com/contact/sales. + + ## CoderdIneligiblePrebuilds + + Prebuilds only become eligible to be claimed by users once the workspace's agent is a) running and b) all of its startup + scripts have completed. + + If a prebuilt workspace is not eligible, view its agent logs to diagnose the problem. + + ## CoderdUnprovisionedPrebuiltWorkspaces + + The number of running prebuilt workspaces is lower than the desired instances. This could be for several reasons, + ordered by likehood: + + ### Experiment/License + + The prebuilds feature is currently gated behind an experiment *and* a premium license. + + Ensure that the prebuilds experiment is enabled with `CODER_EXPERIMENTS=workspace-prebuilds`, and that you have a premium + license added. + + ### Preset Validation Issue + + Templates which have prebuilds configured will require a configured preset defined, with ALL of the required parameters + set in the preset. If any of these are missing, or any of the parameters - as defined - fail validation, then the prebuilds + subsystem will refuse to attempt a workspace build. + + Consult the coderd logs for more information; look out for errors or warnings from the prebuilds subsystem. + + ### Template Misconfiguration or Error + + Prebuilt workspaces cannot be provisioned due to some issue at `terraform apply`-time. This could be due to misconfigured + cloud resources, improper authorization, or any number of other issues. + + Visit the Workspaces page, change the search term to `owner:prebuilds`, and view on the previously failed builds. The + error will likely be quite obvious. + + ### Provisioner Latency + + If your provisioners are overloaded and cannot process provisioner jobs quickly enough, prebuilt workspaces may be affected. + There is no prioritization at present for prebuilt workspace jobs. + + Ensure your provisioners are appropriately resources (i.e. you have enough instances) to handle the concurrent build demand. + + ### Use of Workspace Tags + + If you are using `coder_workspace_tags` ([docs](https://coder.com/docs/admin/templates/extending-templates/workspace-tags)) + in your template, chances are you do not have any provisioners running or they are under-resourced (see **Provisioner Latency**). + + Ensure your running provisioners are configured with your desired tags. + + ### Reconciliation Loop Issue + + The prebuilds subsystem runs a _reconciliation loop_ which monitors the state of prebuilt workspaces to ensure the desired + number of instances are present at all times. Workspace Prebuilds is currently a BETA feature and so there could be a bug + in this _reconciliation loop_, which should be reported to Coder. + + Examine your coderd logs for any errors or warnings relating to prebuilds. + postgres.md: | + # Postgres Runbooks + + ## PostgresNotificationQueueFillingUp + + Postgres offers asynchronous notification via the `LISTEN` and `NOTIFY` + commands. Coder depends heavily on this async notification mechanism for routine + functionality. + + This may be due to a session executing `LISTEN()` and entering a long + transaction. To verify: + + - Check active sessions with `SELECT * FROM pg_stat_activity;`, + - Check the database log for the PID of the session that is preventing cleanup, + - Kill the query: `SELECT pg_terminate_backend();` + + For more information, see the PostgreSQL documentation available here: + + - [PostgreSQL documentation on `LISTEN`](https://www.postgresql.org/docs/current/sql-listen.html) + - [PostgreSQL documentation on `NOTIFY`](https://www.postgresql.org/docs/current/sql-notify.html) + + ## PostgresDown + + Postgres is not currently running, which means the Coder control plane will not be able to read or write any state. + Workspaces may continue to work normally but it is recommended to get Postgres back up as quickly as possible. + + ## PostgresConnectionsRunningLow + + PostgreSQL has a `max_connections` setting that determines the maximum number of + concurrent connections. Once this connection limit is reached, no new + connections will be possible. + + To increase the maximum number of concurrent connections, update the `max_connections` + configuration option for your PostgreSQL instance. See the PostgreSQL + documentation for more details. + + **Note:** You may also need to adjust `shared_buffers` after increasing + `max_connections`. Additionally, you may also need to adjust the kernel + configuration value `kernel.shmmax` in `/etc/sysctl.conf` / + `/etc/sysctl.conf.d`. + + For more information, see: + + - [PostgreSQL Documentation: Server Configuration](https://www.postgresql.org/docs/16/runtime-config-file-locations.html) + - [Tuning your PostgreSQL Server](https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server) + provisionerd.md: | + # Provisionerd Runbooks + + ## ProvisionerdReplicas + + One of more Provisioner replicas is down. Workspace builds may be queued and processed slower. + + To resolve this issue, review the Coder deployment (Coder provisioner pods) + for possible `CrashLoopBackOff` instances or re-adjust alarm levels based on the actual + number of replicas. +--- +# Source: coder-observability/templates/configmap-sql-exporter.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: sql-exporter-config + namespace: coder-observability +data: + config.yaml: |- + global: + target: + name: postgres + data_source_name: 'postgresql://coder@localhost:5432/coder?sslmode=disable' + collectors: + - notify + collectors: + - collector_name: notify + metrics: + # Add a metric to show the current usage of the Postgres "pub/sub" mechanism + # See https://www.postgresql.org/docs/current/functions-info.html + - metric_name: pg_pubsub_usage + type: gauge + help: "The fraction (0–1) of the asynchronous notification queue's maximum size that is currently occupied by notifications that are waiting to be processed" + static_labels: + hostname: localhost + database: coder + values: [ usage ] + query: | + SELECT pg_notification_queue_usage() AS usage; +--- +# Source: coder-observability/templates/dashboards/configmap-dashboards-coderd.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-coderd + namespace: coder-observability +data: + coderd.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Down" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 1) or vector(0)", + "instant": true, + "legendFormat": "Up", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 0) or vector(0)) > 0", + "hide": false, + "instant": true, + "legendFormat": "Down", + "range": false, + "refId": "B" + } + ], + "title": "Replicas", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 18, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "One or more replicas are required to be running in order to serve the control-plane.\n\nSee [High Availability](https://coder.com/docs/v2/latest/admin/high-availability) for details on how to\nrun multiple `coderd` replicas.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.9 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Enabled" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "0": { + "index": 1, + "text": "No" + }, + "1": { + "index": 0, + "text": "Yes" + } + }, + "type": "value" + }, + { + "options": { + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + } + ] + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 32, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_license_user_limit_enabled)", + "instant": true, + "legendFormat": "Enabled", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(\n max(coderd_license_active_users) / max(coderd_license_limit_users)\n) > 0", + "hide": false, + "instant": false, + "legendFormat": "Usage", + "range": true, + "refId": "B" + } + ], + "title": "Enterprise License", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 33, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "If you would like to try Coder's [Enterprise features](https://coder.com/docs/v2/latest/enterprise), you can [request a trial license](https://coder.com/docs/v2/latest/faqs#how-do-i-add-an-enterprise-license).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Requested|Limit)/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.drawStyle", + "value": "line" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_requests{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "B" + } + ], + "title": "CPU Usage Seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 6 + }, + "id": 26, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The cumulative CPU used per core-second. If `coderd` was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "shades" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 6 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (reason) (\n count_over_time(kube_pod_container_status_terminated_reason{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "{{reason}}", + "range": true, + "refId": "C" + } + ], + "title": "Terminations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.0001 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 16, + "y": 6 + }, + "id": 34, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(kube_pod_container_status_restarts_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__range]))", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "B" + } + ], + "title": "Restarts", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 6 + }, + "id": 31, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Requested|Limit)/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.drawStyle", + "value": "line" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (pod) (container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`})", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_limits{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(kube_pod_container_resource_requests{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`, resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "B" + } + ], + "title": "RAM Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 12 + }, + "id": 28, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the total memory used by each `coderd` container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 100 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 12 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.5, coder_pubsub_send_latency_seconds)", + "instant": false, + "legendFormat": "Send", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.5, coder_pubsub_receive_latency_seconds)", + "hide": false, + "instant": false, + "legendFormat": "Receive", + "range": true, + "refId": "B" + } + ], + "title": "Pubsub Latency (Median)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 16, + "y": 12 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "(\n sum(increase(coder_pubsub_latency_measure_errs_total[$__range]))\n / count(coder_pubsub_latency_measure_errs_total)\n) or vector(0)", + "hide": false, + "instant": false, + "legendFormat": "Errors", + "range": true, + "refId": "B" + } + ], + "title": "Pubsub Errors", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 19, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "`coderd` uses Postgres for passing messages between subcomponents for coordination and signalling;\nthis is called \"pubsub\" (or publish-subscribe).\n\nWe measure the time for messages to be sent and received. Latencies higher than 500ms will likely lead to\nyour Coder deployment feeling sluggish. High latency is usually an indication that your Postgres server is under-resourced on CPU.\n\nHigh values for median should be concerning,\nwhile the 90th percentile shows the outliers.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 100 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 15 + }, + "id": 21, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.9, coder_pubsub_send_latency_seconds)", + "instant": false, + "legendFormat": "Send", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "quantile(0.9, coder_pubsub_receive_latency_seconds)", + "hide": false, + "instant": false, + "legendFormat": "Receive", + "range": true, + "refId": "B" + } + ], + "title": "Pubsub Latency (P90)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(pod) (rate(coderd_api_requests_processed_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "API Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 36, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the number of requests per second each `coderd` replica is handling.\n\nHeavy skewing towards a single `coderd` replica indicates faulty loadbalancing.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Control Plane", + "uid": "coderd", + "version": 6, + "weekStart": "" + } +--- +# Source: coder-observability/templates/dashboards/configmap-dashboards-prebuilds.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-prebuilds + namespace: coder-observability +data: + prebuilds.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 10, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "orange", + "index": 2, + "text": "Not enabled" + }, + "1": { + "color": "green", + "index": 0, + "text": "Enabled" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 1, + "text": "Not enabled" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 15 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(coderd_experiments{experiment=\"workspace-prebuilds\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Experiment enabled?", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 49, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max(coderd_prebuilt_workspaces_desired) by (template_name, preset_name)) or vector(0)", + "instant": true, + "interval": "", + "legendFormat": "Desired", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max(coderd_prebuilt_workspaces_running) by (template_name, preset_name)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Running", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max(coderd_prebuilt_workspaces_eligible) by (template_name, preset_name)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Eligible", + "range": false, + "refId": "E" + } + ], + "title": "Current: Global", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 48, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_created_total)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Created", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_failed_total)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Failed", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(max by (template_name, preset_name) (coderd_prebuilt_workspaces_claimed_total)) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Claimed", + "range": false, + "refId": "A" + } + ], + "title": "All Time: Global", + "type": "stat" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "panels": [], + "repeat": "template", + "repeatDirection": "h", + "title": "$template", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 31, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "instant": true, + "interval": "", + "legendFormat": "Desired", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Running", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Eligible", + "range": false, + "refId": "E" + } + ], + "title": "Current: $preset", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 18, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Desired" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "custom.fillOpacity", + "value": 85 + }, + { + "id": "custom.fillBelowTo", + "value": "Running" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + }, + { + "id": "custom.fillBelowTo", + "value": "Eligible" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Eligible" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 4, + "y": 5 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(coderd_prebuilt_workspaces_desired{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "instant": false, + "interval": "", + "legendFormat": "Desired", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(coderd_prebuilt_workspaces_running{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Running", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max(coderd_prebuilt_workspaces_eligible{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Eligible", + "range": true, + "refId": "E" + } + ], + "title": "Pool Capacity: $preset", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 13, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Created" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Desired" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Eligible" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Claimed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 12, + "y": 5 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "floor(max(increase(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Created", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "floor(max(increase(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Failed", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "floor(max(increase(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}[$__rate_interval]))) or vector(0)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Claimed", + "range": true, + "refId": "F" + } + ], + "title": "Pool Operations: $preset", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 5 + }, + "id": 1, + "interval": "30s", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeat": "preset", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_created_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Created", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_failed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Failed", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_prebuilt_workspaces_claimed_total{template_name=~\"$template\", preset_name=~\"$preset\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Claimed", + "range": false, + "refId": "A" + } + ], + "title": "All Time: $preset", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_prebuilt_workspaces_desired,template_name)", + "hide": 0, + "includeAll": false, + "label": "Template", + "multi": false, + "name": "template", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_prebuilt_workspaces_desired,template_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": "", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)", + "hide": 0, + "includeAll": true, + "label": "Preset", + "multi": true, + "name": "preset", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_prebuilt_workspaces_desired{template_name=~\"$template\"},preset_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Prebuilds", + "uid": "cej6jysyme22oa", + "version": 13, + "weekStart": "" + } +--- +# Source: coder-observability/templates/dashboards/configmap-dashboards-provisionerd.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-provisionerd + namespace: coder-observability +data: + provisionerd.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 17, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`})", + "instant": true, + "legendFormat": "Built-in", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder-provisioner.*`, namespace=`coder`})", + "hide": false, + "instant": true, + "legendFormat": "External", + "range": false, + "refId": "B" + } + ], + "title": "Provisioners", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 20, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Provisioners are responsible for building workspaces.\n\n`coderd` runs built-in provisioners by default. Control this with the `CODER_PROVISIONER_DAEMONS` environment variable or `--provisioner-daemons` flag.\n\nYou can also consider [External Provisioners](https://coder.com/docs/v2/latest/admin/provisioners). Running both built-in and external provisioners is perfectly valid,\nalthough dedicated (external) provisioners will generally give the best build performance.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 21, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(sum(coderd_provisionerd_jobs_current) > 0) or vector(0)", + "instant": false, + "legendFormat": "Current", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons)", + "hide": false, + "instant": true, + "legendFormat": "Capacity", + "range": false, + "refId": "B" + } + ], + "title": "Builds", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 22, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The maximum number of simultaneous builds is equivalent to the number of `provisionerd` daemons running.\n\nThe \"Capacity\" panel shows the how many simultaneous builds are possible.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 7 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.5, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))", + "hide": false, + "instant": true, + "legendFormat": "Median", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.9, sum by(le) (rate(coderd_provisionerd_job_timings_seconds_bucket[$__range])))", + "hide": false, + "instant": true, + "legendFormat": "90th Percentile", + "range": false, + "refId": "A" + } + ], + "title": "Build Times", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 7 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the median and 90th percentile workspace build times.\n\nLong build times can impede developers' productivity while they wait for workspaces to start or be created.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Failure" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Success" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 7 + }, + "id": 25, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count[$__interval]))", + "hide": false, + "instant": false, + "interval": "1h", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Build Count Per Hour", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 7 + }, + "id": 26, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "_NOTE: this will not show the current hour._", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Limit|Requested)/" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 14 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{pod=~`coder-provisioner.*`, namespace=`coder`}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_limits{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_requests{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"cpu\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "C" + } + ], + "title": "CPU Usage Seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 14 + }, + "id": 30, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The cumulative CPU used per core-second. If the process was using a full CPU core, that would be represented as 1 second.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/(Limit|Requested)/" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.fillOpacity", + "value": 5 + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 0, + 10 + ], + "fill": "dot" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requested" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 14 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (pod) (container_memory_working_set_bytes{pod=~`coder-provisioner.*`, namespace=`coder`})", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_limits{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(kube_pod_container_resource_requests{pod=~`coder-provisioner.*`, namespace=`coder`, resource=\"memory\"})", + "hide": false, + "instant": false, + "legendFormat": "Requested", + "range": true, + "refId": "C" + } + ], + "title": "RAM Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 14 + }, + "id": 31, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This shows the total memory used by each container; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.\n\nRequests & limits are shown if set.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 18, + "w": 18, + "x": 0, + "y": 21 + }, + "id": 27, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{namespace=~`(coder|coder)`, logger=~\"(.*runner|terraform|provisioner.*)\"}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 21 + }, + "id": 32, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This panel shows all logs across built-in and [external provisioners](https://coder.com/docs/v2/latest/admin/provisioners).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Provisioners", + "uid": "provisionerd", + "version": 10, + "weekStart": "" + } +--- +# Source: coder-observability/templates/dashboards/configmap-dashboards-status.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-status + namespace: coder-observability +data: + status.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": false, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "title": "Application", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Down" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 1) or vector(0) > 0", + "instant": true, + "legendFormat": "Up", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(up{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`} == 0) or vector(0) > 0", + "hide": false, + "instant": true, + "legendFormat": "Down", + "range": false, + "refId": "B" + } + ], + "title": "Coder Replicas", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`})", + "instant": true, + "legendFormat": "Built-in", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(coderd_provisionerd_num_daemons{pod=~`coder-provisioner.*`, namespace=`coder`})", + "hide": false, + "instant": true, + "legendFormat": "External", + "range": false, + "refId": "B" + } + ], + "title": "Provisioners", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Failed" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "displayName", + "value": "Success" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 17, + "options": { + "displayLabels": [ + "name", + "value" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "round(sum by (status) (increase(coderd_provisionerd_job_timings_seconds_count{pod!=``}[$__range])))", + "instant": true, + "legendFormat": "{{status}}", + "range": false, + "refId": "A" + } + ], + "title": "Workspace Builds", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(kube_pod_status_ready{condition=\"true\", namespace=`coder-workspaces`} == 1)\nor\ncount(coderd_api_workspace_latest_build{status=\"running\"})\nor\nvector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Running Workspaces", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*RAM/" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}[1h:1m])\n [$__range:]\n )\n)", + "instant": true, + "legendFormat": "Control Plane CPU", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{pod=~`coder-provisioner.*`, namespace=`coder`}[1h:1m])\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "Provisioner CPU", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{pod=~`coder.*`, pod!~`.*provisioner.*`, namespace=`coder`}\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "Control Plane RAM", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n container_memory_working_set_bytes{pod=~`coder-provisioner.*`, namespace=`coder`}\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "Provisioner RAM", + "range": false, + "refId": "D" + } + ], + "title": "Resource Usage High Watermark (Cumulative)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 19, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(pg_up) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Postgres", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "panels": [], + "title": "Observability Tools", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"coder-observability/prometheus/server\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Prometheus", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"coder-observability/loki/write\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Write Path", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 9 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"coder-observability/loki/read\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Read Path", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 9 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"coder-observability/loki/backend\", container=\"loki\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Backend", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 9 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"coder-observability/loki/canary\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Canary", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Down" + }, + "1": { + "color": "green", + "index": 0, + "text": "Up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 9 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(up{job=\"coder-observability/grafana-agent/grafana-agent\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Grafana Agent", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Unhealthy" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "prometheus_config_last_reload_successful{job=\"coder-observability/prometheus/server\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Prometheus Config", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Unhealthy" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 14 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "min(loki_runtime_config_last_reload_successful) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Loki Config", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Unhealthy" + }, + "1": { + "color": "green", + "index": 0, + "text": "Healthy" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "orange", + "index": 2, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "empty", + "result": { + "color": "orange", + "index": 3, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 4, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 14 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "min(agent_config_last_load_successful{job=\"coder-observability/grafana-agent/grafana-agent\"}) or vector(0)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Grafana Agent Config", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Retention Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Write-Ahead Log" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Storage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f9f9fb", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 14 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "(\n prometheus_tsdb_wal_storage_size_bytes{job=\"coder-observability/prometheus/server\"} +\n prometheus_tsdb_storage_blocks_bytes{job=\"coder-observability/prometheus/server\"} +\n prometheus_tsdb_symbol_table_size_bytes{job=\"coder-observability/prometheus/server\"}\n)\n/\nprometheus_tsdb_retention_limit_bytes{job=\"coder-observability/prometheus/server\"}", + "instant": false, + "legendFormat": "Retention limit used", + "range": true, + "refId": "A" + } + ], + "title": "Prometheus Storage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 14 + }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 35 + }, + "textMode": "auto", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{namespace=\"coder-observability\", resource=\"cpu\"})", + "hide": false, + "instant": true, + "legendFormat": "Requested", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(\n rate(container_cpu_usage_seconds_total{namespace=\"coder-observability\"}[$__rate_interval])\n [$__range:]\n )\n)", + "hide": false, + "instant": true, + "legendFormat": "High Watermark", + "range": false, + "refId": "D" + } + ], + "title": "CPU", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 14 + }, + "id": 21, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 35 + }, + "textMode": "value_and_name", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{namespace=\"coder-observability\", resource=\"memory\"})", + "hide": false, + "instant": true, + "legendFormat": "Requested", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n max_over_time(container_memory_working_set_bytes{namespace=\"coder-observability\"}[$__range])\n)", + "instant": true, + "legendFormat": "High Watermark", + "range": false, + "refId": "A" + } + ], + "title": "RAM", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Status", + "uid": "coder-status", + "version": 1, + "weekStart": "" + } +--- +# Source: coder-observability/templates/dashboards/configmap-dashboards-workspace_detail.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-workspace-detail + namespace: coder-observability +data: + workspaces-detail.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "description": "", + "gridPos": { + "h": 1.2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 28, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "**HINT**: use the dropdowns above to filter by specific workspace(s).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPUs Requested" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM Requested" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PVC Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 20, + "x": 0, + "y": 1.2 + }, + "id": 29, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 40 + }, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "group by (template_name) (coderd_agents_up{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Template Name", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "group by (template_version) (coderd_agents_up{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Template Version", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "group by (username) (coderd_agents_up{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Owner", + "range": false, + "refId": "C" + } + ], + "title": "Details", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #A": true, + "Value #B": true, + "Value #C": true, + "Value #D": true + }, + "includeByName": {}, + "indexByName": { + "CPUs Requested": 7, + "PVC Capacity": 9, + "RAM Requested": 8, + "Time": 0, + "Value #A": 5, + "Value #B": 3, + "Value #C": 6, + "template_name": 2, + "template_version": 4, + "username": 1 + }, + "renameByName": { + "Value #C": "", + "lifecycle_state": "Agent State", + "template_name": "Template", + "template_version": "Template Version", + "username": "Owner" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1.2 + }, + "id": 38, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Essential information about the selected workspace.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPUs Requested" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM Requested" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PVC Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 20, + "x": 0, + "y": 5.2 + }, + "id": 36, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/" + }, + "orientation": "vertical", + "textMode": "value_and_name", + "wideLayout": false, + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 40 + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", namespace=`coder-workspaces`, resource=\"cpu\"})", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "CPUs Requested", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{pod=~\".*$workspace_name.*\", namespace=`coder-workspaces`, resource=\"memory\"})", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "RAM Requested", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\".*$workspace_name.*\",namespace=`coder-workspaces`}\n * on(persistentvolumeclaim) group_right\n group by (persistentvolumeclaim, persistentvolume) (\n label_replace(\n kube_persistentvolume_claim_ref,\n \"persistentvolumeclaim\",\n \"$1\",\n \"name\",\n \"(.+)\"\n )\n )\n * on (persistentvolume)\n kube_persistentvolume_capacity_bytes\n)", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "PVC Capacity", + "range": false, + "refId": "F" + } + ], + "title": "Resources", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #A": true, + "Value #B": true, + "Value #C": true, + "Value #D": true + }, + "includeByName": {}, + "indexByName": { + "CPUs Requested": 7, + "PVC Capacity": 9, + "RAM Requested": 8, + "Time": 0, + "Value #A": 5, + "Value #B": 3, + "Value #C": 6, + "template_name": 2, + "template_version": 4, + "username": 1 + }, + "renameByName": { + "Value #C": "", + "lifecycle_state": "Agent State", + "template_name": "Template", + "template_version": "Template Version", + "username": "Owner" + } + } + } + ], + "type": "stat", + "description": "" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "created": { + "color": "light-blue", + "index": 1, + "text": "Created" + }, + "off": { + "color": "text", + "index": 8, + "text": "Off" + }, + "ready": { + "color": "green", + "index": 0, + "text": "Ready" + }, + "shutdown_error": { + "color": "red", + "index": 7, + "text": "Shutdown Error" + }, + "shutdown_timeout": { + "color": "purple", + "index": 6, + "text": "Shutdown Timeout" + }, + "shutting_down": { + "color": "light-purple", + "index": 5, + "text": "Shutting Down" + }, + "start_error": { + "color": "red", + "index": 4, + "text": "Start Error" + }, + "start_timeout": { + "color": "orange", + "index": 3, + "text": "Start Timeout" + }, + "starting": { + "color": "super-light-green", + "index": 2, + "text": "Starting" + } + }, + "type": "value" + }, + { + "options": { + "match": "empty", + "result": { + "color": "text", + "index": 9, + "text": "Unknown" + } + }, + "type": "special" + }, + { + "options": { + "match": "null", + "result": { + "color": "text", + "index": 10, + "text": "Unknown" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 9.2 + }, + "id": 35, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^lifecycle_state$/", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 50 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (lifecycle_state) (coderd_agents_connections{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "D" + } + ], + "title": "Agent Lifecycle State", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "-1": { + "color": "light-orange", + "index": 0, + "text": "Not completed yet" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 60 + }, + { + "color": "red", + "value": 120 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 4, + "y": 9.2 + }, + "id": 33, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 50 + }, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_agentstats_startup_script_seconds{workspace_name=~\"$workspace_name\"}) or vector(-1)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "C" + } + ], + "title": "Agent Startup Script Execution Time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 7, + "y": 9.2 + }, + "id": 39, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 50 + }, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (app) (\n label_replace(\n {workspace_name=~\"$workspace_name\", __name__=~\"coderd_agentstats_session_count_.*\"},\n \"app\",\n \"$1\",\n \"__name__\",\n \"coderd_agentstats_session_count_(.*)\"\n )\n)>0", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "{{app}}", + "range": false, + "refId": "C" + } + ], + "title": "App Session Counts", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Bytes/" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 10, + "y": 9.2 + }, + "id": 34, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "text": { + "titleSize": 20, + "valueSize": 50 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(coderd_agents_connection_latencies_seconds{workspace_name=~\"$workspace_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Connection Latency", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_rx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Received Bytes", + "range": false, + "refId": "rx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(sum by (pod) (sum_over_time(coderd_agentstats_tx_bytes{workspace_name=~\"$workspace_name\"}[$__range])))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "Transmitted Bytes", + "range": false, + "refId": "tx" + } + ], + "title": "Networking", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #A": "Received Bytes", + "Value #B": "Transmitted Bytes", + "Value #C": "Connection Latency", + "Value #rx": "Received Bytes", + "Value #tx": "Transmitted Bytes" + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 9.2 + }, + "id": 40, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Essential information about this workspace's agent.\n\nRead more about the agent [here](https://coder.com/docs/v2/latest/about/architecture#agents).", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "failed": { + "color": "orange", + "index": 1, + "text": "Failure" + }, + "success": { + "color": "green", + "index": 0, + "text": "Success" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workspace Transition" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "DESTROY": { + "color": "red", + "index": 0 + }, + "START": { + "color": "blue", + "index": 1 + }, + "STOP": { + "color": "purple", + "index": 2 + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 15.2 + }, + "id": 6, + "interval": "", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Time" + } + ] + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\"}\n) > 0", + "format": "table", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Build Log", + "transformations": [ + { + "disabled": true, + "id": "groupBy", + "options": { + "fields": { + "Count": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Status": { + "aggregations": [], + "operation": "groupby" + }, + "Template Name": { + "aggregations": [], + "operation": "groupby" + }, + "Template Version": { + "aggregations": [], + "operation": "groupby" + }, + "Total": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Value": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Workspace Name": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Ownert": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Transition": { + "aggregations": [], + "operation": "groupby" + }, + "status": { + "aggregations": [], + "operation": "groupby" + }, + "template_name": { + "aggregations": [], + "operation": "groupby" + }, + "template_version": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_name": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_owner": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_transition": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Value" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Count", + "Value (sum)": "Total", + "status": "Status", + "template_name": "Template Name", + "template_version": "Template Version", + "workspace_name": "Workspace Name", + "workspace_owner": "Workspace Owner", + "workspace_transition": "Workspace Transition" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 15.2 + }, + "id": 37, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 20, + "x": 0, + "y": 22.2 + }, + "id": 7, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{namespace=~`(coder|coder)`, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" | line_format `{{ printf \"[\\033[35m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`", + "hide": false, + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{namespace=`coder-workspaces`, pod=~\".*($workspace_name).*\"} | line_format `{{ printf \"[\\033[32m\" }}{{.pod}}{{ printf \"\\033[0m]\\t\" }}{{ __line__ }}`", + "hide": false, + "queryType": "range", + "refId": "B" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 22.2 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The logs to the left come both from provisioners and workspace logs.\n\nProvisioner logs matching the name filter are highlighted in magenta, while\nworkspace logs matching the name filter are highlighted in green.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_agents_up,workspace_name)", + "hide": 0, + "includeAll": false, + "label": "Workspace Name Filter", + "multi": false, + "name": "workspace_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_agents_up,workspace_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Workspace Detail", + "uid": "workspace-detail", + "version": 9, + "weekStart": "" + } +--- +# Source: coder-observability/templates/dashboards/configmap-dashboards-workspaces.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: dashboards-workspaces + namespace: coder-observability +data: + workspaces.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "description": "", + "gridPos": { + "h": 1.2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 28, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "**HINT**: use the dropdowns above to filter by specific workspaces and/or templates.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1.2 + }, + "id": 31, + "title": "Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 2.2 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "mean", + "stdDev", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{namespace=`coder-workspaces`}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 2.2 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ + "mean", + "stdDev", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (pod) (container_memory_working_set_bytes{namespace=`coder-workspaces`})", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "RAM Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 2.2 + }, + "id": 36, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "The cumulative CPU used per core-second. If a workspace was using a full CPU core, that would be represented as 1 second.\n\nSee the Kubernetes [documentation](https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units) for more details.\n\nThe total memory used by each workspace container is represented; it is the same metric which the [OOM killer](https://www.kernel.org/doc/gorman/html/understand/understand016.html) uses.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 10.2 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod) (\n round(increase(kube_pod_container_status_restarts_total{namespace=`coder-workspaces`}[$__interval]))\n) > 0", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Pod Restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 10.2 + }, + "id": 39, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod, reason) (\n count_over_time(kube_pod_container_status_terminated_reason{namespace=`coder-workspaces`}[$__interval])\n)", + "hide": false, + "instant": false, + "legendFormat": "{{pod}}:{{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Terminations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 10.2 + }, + "id": 40, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Pods can be terminated for several reasons:\n- `OOMKilled`: pod exceeded its defined memory limit or was terminated by the OS for using excessive memory (if no limit defined)\n- `Error`: usually attributeable to a configuration problem\n- `Evicted`: pod has been evicted from node for overusing resources and will be rescheduled on another node is possible\n\nPod restarts are not necessarily problematic, but they are worth noting.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18.2 + }, + "id": 30, + "panels": [], + "title": "Builds", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DESTROY" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "STOP" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "START" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 19.2 + }, + "id": 2, + "interval": "5m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"success\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Successful Builds by State", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DESTROY" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "STOP" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "START" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 19.2 + }, + "id": 1, + "interval": "5m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (workspace_transition) (\n (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n (\n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{status=\"failed\", workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Unsuccessful Builds by State", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 19.2 + }, + "id": 34, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "Workspaces \"transition\" between `STOP`, `START`, and `DESTROY` states.\n\nWorkspaces transition between states when a \"build\" is initiated, which is an execution of `terraform` against the chosen template.\n\nUse the \"Build Count\" table to identify workspace owners which may be struggling with template builds, in order to proactively reach out to them with assistance.\n\nConsult the [Template documentation](https://coder.com/docs/v2/latest/templates) for more information.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "failed": { + "color": "orange", + "index": 1, + "text": "Failure" + }, + "success": { + "color": "green", + "index": 0, + "text": "Success" + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workspace Transition" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "DESTROY": { + "color": "red", + "index": 0 + }, + "START": { + "color": "blue", + "index": 1 + }, + "STOP": { + "color": "purple", + "index": 2 + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 20, + "x": 0, + "y": 27.2 + }, + "id": 6, + "interval": "", + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Time" + } + ] + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (workspace_name, workspace_owner, status, template_name, template_version, workspace_transition) (\n # Since new series are created and are initially set to a value of 1, we cannot use \"increase\" (because an increase from to 1 does not yield 1).\n # So we compare the current series to an interval ago to see if we have any new series and then sum the series we find. \n ((\n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} - \n coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"} offset $__interval\n ) >= 0) \n or coderd_workspace_builds_total{workspace_name=~\"$workspace_name\", template_name=~\"$template_name\"}\n) > 0", + "format": "table", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Build Log", + "transformations": [ + { + "disabled": true, + "id": "groupBy", + "options": { + "fields": { + "Count": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Status": { + "aggregations": [], + "operation": "groupby" + }, + "Template Name": { + "aggregations": [], + "operation": "groupby" + }, + "Template Version": { + "aggregations": [], + "operation": "groupby" + }, + "Total": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Value": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Workspace Name": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Ownert": { + "aggregations": [], + "operation": "groupby" + }, + "Workspace Transition": { + "aggregations": [], + "operation": "groupby" + }, + "status": { + "aggregations": [], + "operation": "groupby" + }, + "template_name": { + "aggregations": [], + "operation": "groupby" + }, + "template_version": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_name": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_owner": { + "aggregations": [], + "operation": "groupby" + }, + "workspace_transition": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Value" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Count", + "Value (sum)": "Total", + "status": "Status", + "template_name": "Template Name", + "template_version": "Template Version", + "workspace_name": "Workspace Name", + "workspace_owner": "Workspace Owner", + "workspace_transition": "Workspace Transition" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 27.2 + }, + "id": 29, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "This table shows a reverse-chronological log of all workspace builds.\n\nThe \"Count\" field shows the count of events which occurred within a minute, grouped by all columns.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 37.2 + }, + "id": 8, + "interval": "1h", + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (workspace_owner) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Workspace by User", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 5, + "y": 37.2 + }, + "id": 9, + "interval": "1h", + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (workspace_owner, template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": "{{workspace_owner}}:{{template_name}}", + "range": false, + "refId": "A" + } + ], + "title": "Workspace by User/Template", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 10, + "y": 37.2 + }, + "id": 4, + "interval": "1h", + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (template_name) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Template Usage", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 15, + "y": 37.2 + }, + "id": 5, + "interval": "1h", + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (template_name, template_version) (coderd_workspace_latest_build_status{template_name=~\"$template_name\"})", + "instant": true, + "legendFormat": "{{template_name}}:{{template_version}}", + "range": false, + "refId": "A" + } + ], + "title": "Template Version Usage", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 37.2 + }, + "id": 24, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "These charts show the distribution of workspaces and templates.\n\nUse these charts to identify which users have outdated templates, and which templates are the most/least popular in your organisation.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44.2 + }, + "id": 32, + "panels": [], + "title": "Logs", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 10, + "w": 20, + "x": 0, + "y": 45.2 + }, + "id": 7, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "editorMode": "code", + "expr": "{namespace=~`(coder|coder)`, logger=~\"(.*runner|terraform|provisioner.*)\"} |~ \"$workspace_name\" or \"$template_name\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 45.2 + }, + "id": 22, + "links": [ + { + "title": "Provisioners Dashboard", + "url": "/d/provisionerd/provisioners?${__url_time_range}" + } + ], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "These are the logs produced by the [Provisioners](/d/provisionerd/provisioners?${__url_time_range}).\n\nUse the dropdowns at the top to filter the logs down to a specific workspace and/or template.", + "mode": "markdown" + }, + "pluginVersion": "10.4.0", + "transparent": true, + "type": "text" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_workspace_builds_total,workspace_name)", + "hide": 0, + "includeAll": true, + "label": "Workspace Name Filter", + "multi": true, + "name": "workspace_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_workspace_builds_total,workspace_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(coderd_workspace_builds_total,template_name)", + "hide": 0, + "includeAll": true, + "label": "Template Name Filter", + "multi": true, + "name": "template_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coderd_workspace_builds_total,template_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Workspaces", + "uid": "workspaces", + "version": 2, + "weekStart": "" + } +--- +# Source: coder-observability/charts/grafana/templates/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: grafana + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + finalizers: + - kubernetes.io/pvc-protection +spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "10Gi" +--- +# Source: coder-observability/charts/grafana-agent/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent + labels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +rules: + # Rules which allow discovery.kubernetes to function. + - apiGroups: + - "" + - "discovery.k8s.io" + - "networking.k8s.io" + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch + # Rules which allow loki.source.kubernetes and loki.source.podlogs to work. + - apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch + - apiGroups: + - "monitoring.grafana.com" + resources: + - podlogs + verbs: + - get + - list + - watch + # Rules which allow mimir.rules.kubernetes to work. + - apiGroups: ["monitoring.coreos.com"] + resources: + - prometheusrules + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get + # Rules for prometheus.kubernetes.* + - apiGroups: ["monitoring.coreos.com"] + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch + # Rules which allow eventhandler to work. + - apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch + # needed for remote.kubernetes.* + - apiGroups: [""] + resources: + - "configmaps" + - "secrets" + verbs: + - get + - list + - watch + # needed for otelcol.processor.k8sattributes + - apiGroups: ["apps"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] +--- +# Source: coder-observability/charts/grafana/templates/clusterrole.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + annotations: + prometheus.io/scrape: "true" + name: grafana-clusterrole +rules: [] +--- +# Source: coder-observability/charts/loki/templates/backend/clusterrole.yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + name: loki-clusterrole +rules: + - apiGroups: [""] # "" indicates the core API group + resources: ["configmaps", "secrets"] + verbs: ["get", "watch", "list"] +--- +# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability + name: kube-state-metrics +rules: + - apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - configmaps + verbs: ["list", "watch"] + - apiGroups: ["batch"] + resources: + - cronjobs + verbs: ["list", "watch"] + - apiGroups: ["extensions", "apps"] + resources: + - daemonsets + verbs: ["list", "watch"] + - apiGroups: ["extensions", "apps"] + resources: + - deployments + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - endpoints + verbs: ["list", "watch"] + - apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] + - apiGroups: ["extensions", "networking.k8s.io"] + resources: + - ingresses + verbs: ["list", "watch"] + - apiGroups: ["batch"] + resources: + - jobs + verbs: ["list", "watch"] + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - limitranges + verbs: ["list", "watch"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch"] + - apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - persistentvolumeclaims + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - persistentvolumes + verbs: ["list", "watch"] + - apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - pods + verbs: ["list", "watch"] + - apiGroups: ["extensions", "apps"] + resources: + - replicasets + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - replicationcontrollers + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - resourcequotas + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - secrets + verbs: ["list", "watch"] + - apiGroups: [""] + resources: + - services + verbs: ["list", "watch"] + - apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + verbs: ["list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: + - volumeattachments + verbs: ["list", "watch"] +--- +# Source: coder-observability/charts/prometheus/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - ingresses + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + - "networking.k8s.io" + resources: + - ingresses/status + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - "discovery.k8s.io" + resources: + - endpointslices + verbs: + - get + - list + - watch + - nonResourceURLs: + - "/metrics" + verbs: + - get +--- +# Source: coder-observability/charts/grafana-agent/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-agent + labels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: + - kind: ServiceAccount + name: grafana-agent + namespace: coder-observability +--- +# Source: coder-observability/charts/grafana/templates/clusterrolebinding.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: grafana-clusterrolebinding + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + annotations: + prometheus.io/scrape: "true" +subjects: + - kind: ServiceAccount + name: grafana + namespace: coder-observability +roleRef: + kind: ClusterRole + name: grafana-clusterrole + apiGroup: rbac.authorization.k8s.io +--- +# Source: coder-observability/charts/loki/templates/backend/clusterrolebinding.yaml +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: loki-clusterrolebinding + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +subjects: + - kind: ServiceAccount + name: loki + namespace: coder-observability +roleRef: + kind: ClusterRole + name: loki-clusterrole + apiGroup: rbac.authorization.k8s.io +--- +# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics + namespace: coder-observability +--- +# Source: coder-observability/charts/prometheus/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: coder-observability +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +--- +# Source: coder-observability/charts/grafana/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: grafana + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + annotations: + prometheus.io/scrape: "true" +rules: [] +--- +# Source: coder-observability/charts/grafana/templates/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: grafana + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + annotations: + prometheus.io/scrape: "true" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: grafana +subjects: + - kind: ServiceAccount + name: grafana + namespace: coder-observability +--- +# Source: coder-observability/charts/grafana-agent/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: grafana-agent + labels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 80 + targetPort: 80 + protocol: "TCP" +--- +# Source: coder-observability/charts/grafana/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - name: service + port: 80 + protocol: TCP + targetPort: 3000 + selector: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability +--- +# Source: coder-observability/charts/loki/charts/minio/templates/console-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-storage-console + namespace: "coder-observability" + labels: + app: minio + chart: minio-4.0.15 + release: coder-observability + heritage: Helm +spec: + type: ClusterIP + ports: + - name: http + port: 9001 + protocol: TCP + targetPort: 9001 + selector: + app: minio + release: coder-observability +--- +# Source: coder-observability/charts/loki/charts/minio/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-storage + namespace: "coder-observability" + labels: + app: minio + chart: minio-4.0.15 + release: coder-observability + heritage: Helm + monitoring: "true" +spec: + type: ClusterIP + ports: + - name: http + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: minio + release: coder-observability +--- +# Source: coder-observability/charts/loki/charts/minio/templates/statefulset.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-storage-svc + namespace: "coder-observability" + labels: + app: minio + chart: minio-4.0.15 + release: "coder-observability" + heritage: "Helm" +spec: + publishNotReadyAddresses: true + clusterIP: None + ports: + - name: http + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: minio + release: coder-observability +--- +# Source: coder-observability/charts/loki/templates/backend/query-scheduler-discovery.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-query-scheduler-discovery + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend + prometheus.io/service-monitor: "false" +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: true + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend +--- +# Source: coder-observability/charts/loki/templates/backend/service-backend-headless.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-backend-headless + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend + variant: headless + prometheus.io/service-monitor: "false" + annotations: +spec: + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend +--- +# Source: coder-observability/charts/loki/templates/backend/service-backend.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-backend + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: backend + annotations: +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend +--- +# Source: coder-observability/charts/loki/templates/chunks-cache/service-chunks-cache-headless.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-chunks-cache + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: "memcached-chunks-cache" + annotations: {} + namespace: "coder-observability" +spec: + type: ClusterIP + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: http-metrics + port: 9150 + targetPort: 9150 + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: "memcached-chunks-cache" +--- +# Source: coder-observability/charts/loki/templates/gateway/service-gateway.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-gateway + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: gateway + annotations: +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 80 + targetPort: http-metrics + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: gateway +--- +# Source: coder-observability/charts/loki/templates/loki-canary/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-canary + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: canary + annotations: +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 3500 + targetPort: http-metrics + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: canary +--- +# Source: coder-observability/charts/loki/templates/read/service-read-headless.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-read-headless + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: read + variant: headless + prometheus.io/service-monitor: "false" + annotations: +spec: + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + appProtocol: tcp + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: read +--- +# Source: coder-observability/charts/loki/templates/read/service-read.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-read + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: read + annotations: +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: read +--- +# Source: coder-observability/charts/loki/templates/results-cache/service-results-cache-headless.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-results-cache + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: "memcached-results-cache" + annotations: {} + namespace: "coder-observability" +spec: + type: ClusterIP + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: http-metrics + port: 9150 + targetPort: 9150 + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: "memcached-results-cache" +--- +# Source: coder-observability/charts/loki/templates/service-memberlist.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-memberlist + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + clusterIP: None + ports: + - name: tcp + port: 7946 + targetPort: http-memberlist + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/part-of: memberlist +--- +# Source: coder-observability/charts/loki/templates/write/service-write-headless.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-write-headless + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: write + variant: headless + prometheus.io/service-monitor: "false" + annotations: +spec: + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + appProtocol: tcp + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: write +--- +# Source: coder-observability/charts/loki/templates/write/service-write.yaml +apiVersion: v1 +kind: Service +metadata: + name: loki-write + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: write + annotations: +spec: + type: ClusterIP + ports: + - name: http-metrics + port: 3100 + targetPort: http-metrics + protocol: TCP + - name: grpc + port: 9095 + targetPort: grpc + protocol: TCP + selector: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: write +--- +# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/services.yaml +apiVersion: v1 +kind: Service +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + namespace: coder-observability +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability +--- +# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/services.yaml +apiVersion: v1 +kind: Service +metadata: + name: alertmanager-headless + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + namespace: coder-observability +spec: + clusterIP: None + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability +--- +# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: coder-observability + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability + annotations: + prometheus.io/scrape: 'true' +spec: + type: "ClusterIP" + ports: + - name: "http" + protocol: TCP + port: 8080 + targetPort: 8080 + selector: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability +--- +# Source: coder-observability/charts/prometheus/charts/prometheus-node-exporter/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: node-exporter + namespace: coder-observability + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: coder-observability + annotations: + prometheus.io/scrape: "true" +spec: + type: ClusterIP + ports: + - port: 9100 + targetPort: 9100 + protocol: TCP + name: metrics + selector: + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: coder-observability +--- +# Source: coder-observability/charts/prometheus/templates/headless-svc.yaml +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus-headless + namespace: coder-observability +spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 9090 + selector: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability +--- +# Source: coder-observability/charts/prometheus/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus + namespace: coder-observability +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 9090 + selector: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + sessionAffinity: None + type: "ClusterIP" +--- +# Source: coder-observability/templates/service-runbook-viewer.yaml +apiVersion: v1 +kind: Service +metadata: + name: runbook-viewer +spec: + ports: + - port: 80 + targetPort: 3000 + protocol: TCP + selector: + app: runbook-viewer +--- +# Source: coder-observability/charts/grafana-agent/templates/controllers/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: grafana-agent + labels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm +spec: + minReadySeconds: 10 + selector: + matchLabels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: grafana-agent + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/instance: coder-observability + spec: + serviceAccountName: grafana-agent + containers: + - name: grafana-agent + imagePullPolicy: IfNotPresent + args: + - run + - /etc/agent/config.river + - --storage.path=/tmp/agent + - --server.http.listen-addr=0.0.0.0:80 + - --server.http.ui-path-prefix=/ + - --disable-reporting=true + env: + - name: AGENT_MODE + value: flow + - name: AGENT_DEPLOY_MODE + value: "helm" + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - containerPort: 80 + name: http-metrics + readinessProbe: + httpGet: + path: /-/ready + port: 80 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + volumeMounts: + - name: config + mountPath: /etc/agent + - name: varlog + mountPath: /var/log + readOnly: true + - name: dockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: config-reloader + args: + - --volume-dir=/etc/agent + - --webhook-url=http://localhost:80/-/reload + volumeMounts: + - name: config + mountPath: /etc/agent + resources: + requests: + cpu: 1m + memory: 5Mi + dnsPolicy: ClusterFirst + volumes: + - name: config + configMap: + name: collector-config + - name: varlog + hostPath: + path: /var/log + - name: dockercontainers + hostPath: + path: /var/lib/docker/containers +--- +# Source: coder-observability/charts/loki/templates/loki-canary/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: loki-canary + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: canary +spec: + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: canary + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + annotations: + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: canary + spec: + serviceAccountName: loki-canary + securityContext: + fsGroup: 10001 + runAsGroup: 10001 + runAsNonRoot: true + runAsUser: 10001 + containers: + - name: loki-canary + imagePullPolicy: IfNotPresent + args: + - -addr=loki-gateway.coder-observability.svc.cluster.local.:80 + - -labelname=pod + - -labelvalue=$(POD_NAME) + - -push=true + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + volumeMounts: + ports: + - name: http-metrics + containerPort: 3500 + protocol: TCP + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + readinessProbe: + httpGet: + path: /metrics + port: http-metrics + initialDelaySeconds: 15 + timeoutSeconds: 1 + volumes: +--- +# Source: coder-observability/charts/prometheus/charts/prometheus-node-exporter/templates/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: coder-observability + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: coder-observability +spec: + selector: + matchLabels: + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: coder-observability + revisionHistoryLimit: 10 + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + prometheus.io/scrape: "true" + labels: + helm.sh/chart: prometheus-node-exporter-4.37.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/version: "1.8.1" + spec: + automountServiceAccountToken: false + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: node-exporter + containers: + - name: node-exporter + imagePullPolicy: IfNotPresent + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --path.udev.data=/host/root/run/udev/data + - --web.listen-address=[$(HOST_IP)]:9100 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + env: + - name: HOST_IP + value: 0.0.0.0 + ports: + - name: metrics + containerPort: 9100 + protocol: TCP + livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: / + port: 9100 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: / + port: 9100 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + - name: root + mountPath: /host/root + mountPropagation: HostToContainer + readOnly: true + hostNetwork: true + hostPID: true + nodeSelector: + kubernetes.io/os: linux + tolerations: + - effect: NoSchedule + operator: Exists + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + - name: root + hostPath: + path: / +--- +# Source: coder-observability/charts/loki/templates/gateway/deployment-gateway-nginx.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: loki-gateway + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: gateway +spec: + replicas: 1 + strategy: + type: RollingUpdate + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: gateway + template: + metadata: + annotations: + checksum/config: bc7add19cdc0df1566dec1bf8f9421082357d4393124d6ea2df28d7e5888cc8a + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: gateway + spec: + serviceAccountName: loki + enableServiceLinks: true + securityContext: + fsGroup: 101 + runAsGroup: 101 + runAsNonRoot: true + runAsUser: 101 + terminationGracePeriodSeconds: 30 + containers: + - name: nginx + imagePullPolicy: IfNotPresent + ports: + - name: http-metrics + containerPort: 8080 + protocol: TCP + readinessProbe: + httpGet: + path: / + port: http-metrics + initialDelaySeconds: 15 + timeoutSeconds: 1 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + volumeMounts: + - name: config + mountPath: /etc/nginx + - name: tmp + mountPath: /tmp + - name: docker-entrypoint-d-override + mountPath: /docker-entrypoint.d + resources: {} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/component: gateway + topologyKey: kubernetes.io/hostname + volumes: + - name: config + configMap: + name: loki-gateway + - name: tmp + emptyDir: {} + - name: docker-entrypoint-d-override + emptyDir: {} +--- +# Source: coder-observability/charts/loki/templates/read/deployment-read.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: loki-read + namespace: coder-observability + labels: + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: read +spec: + replicas: 1 + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: read + template: + metadata: + annotations: + checksum/config: 4dbe50185304244ab527314b7723d048ea4544f97d0a4d8e0192863861811005 + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/part-of: memberlist + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: read + spec: + serviceAccountName: loki + automountServiceAccountToken: true + securityContext: + fsGroup: 10001 + runAsGroup: 10001 + runAsNonRoot: true + runAsUser: 10001 + terminationGracePeriodSeconds: 30 + containers: + - name: loki + imagePullPolicy: IfNotPresent + args: + - -config.file=/etc/loki/config/config.yaml + - -target=read + - -legacy-read-mode=false + - -common.compactor-grpc-address=loki-backend.coder-observability.svc.cluster.local:9095 + ports: + - name: http-metrics + containerPort: 3100 + protocol: TCP + - name: grpc + containerPort: 9095 + protocol: TCP + - name: http-memberlist + containerPort: 7946 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + readinessProbe: + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 30 + timeoutSeconds: 1 + volumeMounts: + - name: config + mountPath: /etc/loki/config + - name: runtime-config + mountPath: /etc/loki/runtime-config + - name: tmp + mountPath: /tmp + - name: data + mountPath: /var/loki + resources: {} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/component: read + topologyKey: kubernetes.io/hostname + volumes: + - name: tmp + emptyDir: {} + - name: data + emptyDir: {} + - name: config + configMap: + name: loki + items: + - key: "config.yaml" + path: "config.yaml" + - name: runtime-config + configMap: + name: loki-runtime +--- +# Source: coder-observability/charts/prometheus/charts/kube-state-metrics/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: coder-observability + labels: + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability +spec: + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability + replicas: 1 + strategy: + type: RollingUpdate + revisionHistoryLimit: 10 + template: + metadata: + labels: + helm.sh/chart: kube-state-metrics-5.21.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/version: "2.12.0" + annotations: + prometheus.io/scrape: "true" + spec: + automountServiceAccountToken: true + hostNetwork: false + serviceAccountName: kube-state-metrics + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + containers: + - name: kube-state-metrics + args: + - --port=8080 + - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + name: "http" + livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: /healthz + port: 8080 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: + path: / + port: 8080 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true +--- +# Source: coder-observability/charts/grafana/templates/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: grafana + namespace: coder-observability + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + annotations: + prometheus.io/scrape: "true" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + serviceName: grafana-headless + template: + metadata: + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: coder-observability + annotations: + checksum/config: 2828a490315379b00f2116ebe6a20dd3ca9a4d5ce5839f037c1eb0a4501ecb18 + checksum/dashboards-json-config: 010b57348b6dd1f09007330c03d22a0570022534712646511cad39a9e3cb4bb7 + checksum/sc-dashboard-provider-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b + kubectl.kubernetes.io/default-container: grafana + spec: + serviceAccountName: grafana + automountServiceAccountToken: true + securityContext: + fsGroup: 472 + runAsGroup: 472 + runAsNonRoot: true + runAsUser: 472 + initContainers: + - name: init-chown-data + image: "docker.io/library/busybox:1.31.1" + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + runAsNonRoot: false + runAsUser: 0 + seccompProfile: + type: RuntimeDefault + command: + - chown + - -R + - 472:472 + - /var/lib/grafana + volumeMounts: + - name: storage + mountPath: "/var/lib/grafana" + - name: download-dashboards + image: "docker.io/curlimages/curl:7.85.0" + imagePullPolicy: IfNotPresent + command: ["/bin/sh"] + args: ["-c", "mkdir -p /var/lib/grafana/dashboards/default && /bin/sh -x /etc/grafana/download_dashboards.sh"] + env: + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + volumeMounts: + - name: config + mountPath: "/etc/grafana/download_dashboards.sh" + subPath: download_dashboards.sh + - name: storage + mountPath: "/var/lib/grafana" + enableServiceLinks: true + containers: + - name: grafana + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + volumeMounts: + - name: config + mountPath: "/etc/grafana/grafana.ini" + subPath: grafana.ini + - name: dashboards-status + mountPath: /var/lib/grafana/dashboards/coder/0 + subPath: + readOnly: false + - name: dashboards-coderd + mountPath: /var/lib/grafana/dashboards/coder/1 + subPath: + readOnly: false + - name: dashboards-provisionerd + mountPath: /var/lib/grafana/dashboards/coder/2 + subPath: + readOnly: false + - name: dashboards-workspaces + mountPath: /var/lib/grafana/dashboards/coder/3 + subPath: + readOnly: false + - name: dashboards-workspace-detail + mountPath: /var/lib/grafana/dashboards/coder/4 + subPath: + readOnly: false + - name: dashboards-prebuilds + mountPath: /var/lib/grafana/dashboards/coder/5 + subPath: + readOnly: false + - name: storage + mountPath: "/var/lib/grafana" + - name: config + mountPath: "/etc/grafana/provisioning/datasources/datasources.yaml" + subPath: "datasources.yaml" + - name: config + mountPath: "/etc/grafana/provisioning/dashboards/coder.yaml" + subPath: "coder.yaml" + - name: config + mountPath: "/etc/grafana/provisioning/dashboards/infra.yaml" + subPath: "infra.yaml" + - name: config + mountPath: "/etc/grafana/provisioning/dashboards/sidecar.yaml" + subPath: "sidecar.yaml" + ports: + - name: grafana + containerPort: 3000 + protocol: TCP + - name: gossip-tcp + containerPort: 9094 + protocol: TCP + - name: gossip-udp + containerPort: 9094 + protocol: UDP + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: GF_PATHS_DATA + value: /var/lib/grafana/ + - name: GF_PATHS_LOGS + value: /var/log/grafana + - name: GF_PATHS_PLUGINS + value: /var/lib/grafana/plugins + - name: GF_PATHS_PROVISIONING + value: /etc/grafana/provisioning + - name: "GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION" + value: "true" + livenessProbe: + failureThreshold: 10 + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 60 + timeoutSeconds: 30 + readinessProbe: + httpGet: + path: /api/health + port: 3000 + volumes: + - name: config + configMap: + name: grafana + - name: dashboards-status + configMap: + name: dashboards-status + - name: dashboards-coderd + configMap: + name: dashboards-coderd + - name: dashboards-provisionerd + configMap: + name: dashboards-provisionerd + - name: dashboards-workspaces + configMap: + name: dashboards-workspaces + - name: dashboards-workspace-detail + configMap: + name: dashboards-workspace-detail + - name: dashboards-prebuilds + configMap: + name: dashboards-prebuilds + - name: dashboards-infra + configMap: + name: grafana-dashboards-infra + - name: storage + persistentVolumeClaim: + claimName: grafana + volumeClaimTemplates: + - metadata: + name: storage + spec: + accessModes: [ReadWriteOnce] + storageClassName: + resources: + requests: + storage: 10Gi +--- +# Source: coder-observability/charts/loki/charts/minio/templates/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki-storage + namespace: "coder-observability" + labels: + app: minio + chart: minio-4.0.15 + release: coder-observability + heritage: Helm +spec: + updateStrategy: + type: RollingUpdate + podManagementPolicy: "Parallel" + serviceName: loki-storage-svc + replicas: 1 + selector: + matchLabels: + app: minio + release: coder-observability + template: + metadata: + name: loki-storage + labels: + app: minio + release: coder-observability + app.kubernetes.io/name: loki-storage + annotations: + checksum/secrets: 982fb92f094edb3a3a156ec880923b557a774f885bf38c7d14d92c08f1d1257d + checksum/config: 876a07a82a63058ee3cc32fd1988af1e51b44e1d25825387ad4ecfdde9199417 + prometheus.io/path: /minio/v2/metrics/cluster + prometheus.io/scrape: "true" + spec: + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + serviceAccountName: minio-sa + containers: + - name: minio + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-ce", "/usr/bin/docker-entrypoint.sh minio server http://loki-storage-{0...0}.loki-storage-svc.coder-observability.svc.cluster.local/export-{0...1} -S /etc/minio/certs/ --address :9000 --console-address :9001"] + volumeMounts: + - name: export-0 + mountPath: /export-0 + - name: export-1 + mountPath: /export-1 + ports: + - name: http + containerPort: 9000 + - name: http-console + containerPort: 9001 + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: loki-storage + key: rootUser + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: loki-storage + key: rootPassword + - name: MINIO_PROMETHEUS_AUTH_TYPE + value: "public" + resources: + requests: + cpu: 100m + memory: 128Mi + volumes: + - name: minio-user + secret: + secretName: loki-storage + volumeClaimTemplates: + - metadata: + name: export-0 + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi + - metadata: + name: export-1 + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi +--- +# Source: coder-observability/charts/loki/templates/backend/statefulset-backend.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki-backend + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: backend + app.kubernetes.io/part-of: memberlist +spec: + replicas: 1 + podManagementPolicy: Parallel + updateStrategy: + rollingUpdate: + partition: 0 + serviceName: loki-backend-headless + revisionHistoryLimit: 10 + persistentVolumeClaimRetentionPolicy: + whenDeleted: Delete + whenScaled: Delete + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend + template: + metadata: + annotations: + checksum/config: 4dbe50185304244ab527314b7723d048ea4544f97d0a4d8e0192863861811005 + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: backend + app.kubernetes.io/part-of: memberlist + spec: + serviceAccountName: loki + automountServiceAccountToken: true + securityContext: + fsGroup: 10001 + runAsGroup: 10001 + runAsNonRoot: true + runAsUser: 10001 + terminationGracePeriodSeconds: 300 + containers: + - name: loki-sc-rules + imagePullPolicy: IfNotPresent + env: + - name: METHOD + value: WATCH + - name: LABEL + value: "loki_rule" + - name: FOLDER + value: "/rules/fake" + - name: RESOURCE + value: "both" + - name: WATCH_SERVER_TIMEOUT + value: "60" + - name: WATCH_CLIENT_TIMEOUT + value: "60" + - name: LOG_LEVEL + value: "DEBUG" + volumeMounts: + - name: sc-rules-volume + mountPath: "/rules/fake" + - name: loki + imagePullPolicy: IfNotPresent + args: + - -config.file=/etc/loki/config/config.yaml + - -target=backend + - -legacy-read-mode=false + - -log.level=debug + ports: + - name: http-metrics + containerPort: 3100 + protocol: TCP + - name: grpc + containerPort: 9095 + protocol: TCP + - name: http-memberlist + containerPort: 7946 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + readinessProbe: + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 30 + timeoutSeconds: 1 + volumeMounts: + - name: config + mountPath: /etc/loki/config + - name: runtime-config + mountPath: /etc/loki/runtime-config + - name: tmp + mountPath: /tmp + - name: data + mountPath: /var/loki + - name: sc-rules-volume + mountPath: "/rules/fake" + - mountPath: /var/loki-ruler-wal + name: ruler-wal + resources: {} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/component: backend + topologyKey: kubernetes.io/hostname + volumes: + - name: tmp + emptyDir: {} + - name: config + configMap: + name: loki + items: + - key: "config.yaml" + path: "config.yaml" + - name: runtime-config + configMap: + name: loki-runtime + - name: sc-rules-volume + emptyDir: {} + - emptyDir: {} + name: ruler-wal + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "10Gi" +--- +# Source: coder-observability/charts/loki/templates/chunks-cache/statefulset-chunks-cache.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki-chunks-cache + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: "memcached-chunks-cache" + name: "memcached-chunks-cache" + annotations: {} + namespace: "coder-observability" +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: "memcached-chunks-cache" + name: "memcached-chunks-cache" + updateStrategy: + type: RollingUpdate + serviceName: loki-chunks-cache + template: + metadata: + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: "memcached-chunks-cache" + name: "memcached-chunks-cache" + annotations: + spec: + serviceAccountName: loki + securityContext: {} + initContainers: [] + nodeSelector: {} + affinity: {} + topologySpreadConstraints: [] + tolerations: [] + terminationGracePeriodSeconds: 60 + containers: + - name: memcached + imagePullPolicy: IfNotPresent + resources: + limits: + memory: 1229Mi + requests: + cpu: 500m + memory: 1229Mi + ports: + - containerPort: 11211 + name: client + args: + - -m 1024 + - --extended=modern,track_sizes + - -I 5m + - -c 16384 + - -v + - -u 11211 + env: + envFrom: + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + - name: exporter + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9150 + name: http-metrics + args: + - "--memcached.address=localhost:11211" + - "--web.listen-address=0.0.0.0:9150" + resources: + limits: {} + requests: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true +--- +# Source: coder-observability/charts/loki/templates/results-cache/statefulset-results-cache.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki-results-cache + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: "memcached-results-cache" + name: "memcached-results-cache" + annotations: {} + namespace: "coder-observability" +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: "memcached-results-cache" + name: "memcached-results-cache" + updateStrategy: + type: RollingUpdate + serviceName: loki-results-cache + template: + metadata: + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: "memcached-results-cache" + name: "memcached-results-cache" + annotations: + spec: + serviceAccountName: loki + securityContext: {} + initContainers: [] + nodeSelector: {} + affinity: {} + topologySpreadConstraints: [] + tolerations: [] + terminationGracePeriodSeconds: 60 + containers: + - name: memcached + imagePullPolicy: IfNotPresent + resources: + limits: + memory: 1229Mi + requests: + cpu: 500m + memory: 1229Mi + ports: + - containerPort: 11211 + name: client + args: + - -m 1024 + - --extended=modern,track_sizes + - -I 5m + - -c 16384 + - -v + - -u 11211 + env: + envFrom: + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + - name: exporter + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9150 + name: http-metrics + args: + - "--memcached.address=localhost:11211" + - "--web.listen-address=0.0.0.0:9150" + resources: + limits: {} + requests: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true +--- +# Source: coder-observability/charts/loki/templates/write/statefulset-write.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki-write + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: write + app.kubernetes.io/part-of: memberlist +spec: + replicas: 1 + podManagementPolicy: Parallel + updateStrategy: + rollingUpdate: + partition: 0 + serviceName: loki-write-headless + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: write + template: + metadata: + annotations: + checksum/config: 4dbe50185304244ab527314b7723d048ea4544f97d0a4d8e0192863861811005 + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/component: write + app.kubernetes.io/part-of: memberlist + spec: + serviceAccountName: loki + automountServiceAccountToken: true + enableServiceLinks: true + securityContext: + fsGroup: 10001 + runAsGroup: 10001 + runAsNonRoot: true + runAsUser: 10001 + terminationGracePeriodSeconds: 300 + containers: + - name: loki + imagePullPolicy: IfNotPresent + args: + - -config.file=/etc/loki/config/config.yaml + - -target=write + - -log.level=debug + ports: + - name: http-metrics + containerPort: 3100 + protocol: TCP + - name: grpc + containerPort: 9095 + protocol: TCP + - name: http-memberlist + containerPort: 7946 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + readinessProbe: + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 30 + timeoutSeconds: 1 + volumeMounts: + - name: config + mountPath: /etc/loki/config + - name: runtime-config + mountPath: /etc/loki/runtime-config + - name: data + mountPath: /var/loki + resources: {} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/component: write + topologyKey: kubernetes.io/hostname + volumes: + - name: config + configMap: + name: loki + items: + - key: "config.yaml" + path: "config.yaml" + - name: runtime-config + configMap: + name: loki-runtime + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "10Gi" +--- +# Source: coder-observability/charts/prometheus/charts/alertmanager/templates/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + namespace: coder-observability +spec: + replicas: 1 + minReadySeconds: 0 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + serviceName: alertmanager-headless + template: + metadata: + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: coder-observability + annotations: + checksum/config: 490f47b0d70495b76347dac06d9734b3074df82e0cc70a914ab7859d725f850b + prometheus.io/scrape: "true" + spec: + automountServiceAccountToken: true + serviceAccountName: alertmanager + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + containers: + - name: alertmanager + securityContext: + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + imagePullPolicy: IfNotPresent + env: + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + args: + - --storage.path=/alertmanager + - --config.file=/etc/alertmanager/alertmanager.yml + ports: + - name: http + containerPort: 9093 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: {} + volumeMounts: + - name: config + mountPath: /etc/alertmanager + - name: storage + mountPath: /alertmanager + volumes: + - name: config + configMap: + name: alertmanager + volumeClaimTemplates: + - metadata: + name: storage + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi +--- +# Source: coder-observability/charts/prometheus/templates/sts.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + name: prometheus + namespace: coder-observability +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + serviceName: prometheus-headless + selector: + matchLabels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + replicas: 1 + revisionHistoryLimit: 10 + podManagementPolicy: OrderedReady + template: + metadata: + annotations: + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/version: v2.53.1 + helm.sh/chart: prometheus-25.24.2 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: prometheus + spec: + enableServiceLinks: true + serviceAccountName: prometheus + containers: + - name: prometheus-server-configmap-reload + imagePullPolicy: "IfNotPresent" + args: + - --watched-dir=/etc/config + - --listen-address=0.0.0.0:9091 + - --reload-url=http://127.0.0.1:9090/-/reload + - --log-level=all + - --watch-interval=15s + ports: + - containerPort: 9091 + name: metrics + livenessProbe: + httpGet: + path: /healthz + port: metrics + scheme: HTTP + initialDelaySeconds: 2 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /healthz + port: metrics + scheme: HTTP + periodSeconds: 10 + startupProbe: + httpGet: + path: /healthz + port: metrics + scheme: HTTP + periodSeconds: 10 + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + - name: configmap-reload-alerts + mountPath: /etc/config/alerts + subPath: + readOnly: + - name: prometheus-server + imagePullPolicy: "IfNotPresent" + args: + - --storage.tsdb.retention.time=15d + - --storage.tsdb.retention.size=10GB + - --config.file=/etc/config/prometheus.yml + - --storage.tsdb.path=/data + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + - --web.enable-lifecycle + - --enable-feature=remote-write-receiver + - --log.level=debug + ports: + - containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 4 + failureThreshold: 3 + successThreshold: 1 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 10 + failureThreshold: 3 + successThreshold: 1 + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: /data + subPath: "" + - name: server-alerts + mountPath: /etc/config/alerts + subPath: + readOnly: + hostNetwork: false + dnsPolicy: ClusterFirst + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + terminationGracePeriodSeconds: 300 + volumes: + - name: config-volume + configMap: + name: prometheus + - name: configmap-reload-alerts + configMap: + name: metrics-alerts + - name: server-alerts + configMap: + name: metrics-alerts + - name: alerts + configMap: + name: metrics-alerts + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: storage-volume + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "12Gi" +--- +# Source: coder-observability/templates/statefulset-postgres-exporter.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres-exporter + namespace: coder-observability +spec: + selector: + matchLabels: + app: postgres-exporter + serviceName: postgres-exporter + replicas: 1 + template: + metadata: + annotations: + prometheus.io/scrape: 'true' + labels: + app: postgres-exporter + app.kubernetes.io/name: "database-stats" + spec: + containers: + - name: postgres-exporter + args: + - --collector.long_running_transactions + ports: + - containerPort: 9187 + name: exporter + env: + - name: DATA_SOURCE_NAME + value: 'postgresql://coder@localhost:5432/coder?sslmode=disable' + envFrom: + - secretRef: + name: secret-postgres +--- +# Source: coder-observability/templates/statefulset-runbook-viewer.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: runbook-viewer + namespace: coder-observability +spec: + selector: + matchLabels: + app: runbook-viewer + serviceName: runbook-viewer + replicas: 1 + template: + metadata: + annotations: + checksum/config: b0c41033d0385ee3d46488f08e85bcef0d939614dcb99194e0c5913dbf0c2c33 + labels: + app: runbook-viewer + spec: + containers: + - name: madness + ports: + - containerPort: 3000 + name: madness + args: + - server + volumeMounts: + - mountPath: /docs/ + name: runbooks + volumes: + - name: runbooks + configMap: + name: runbooks +--- +# Source: coder-observability/templates/statefulset-sql-exporter.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: sql-exporter + namespace: coder-observability +spec: + selector: + matchLabels: + app: sql-exporter + serviceName: sql-exporter + replicas: 1 + template: + metadata: + annotations: + prometheus.io/scrape: 'true' + checksum/config: 71bb9e7579b6e138ae28c623aa29d72025be00387da6c1b8dd5aa168c96ca1e0 + labels: + app: sql-exporter + app.kubernetes.io/name: "database-stats" + spec: + containers: + - name: sql-exporter + args: + - -config.file=/cfg/config.yaml + ports: + - containerPort: 9399 + name: exporter + volumeMounts: + - mountPath: /cfg/ + name: config + envFrom: + - secretRef: + name: secret-postgres + volumes: + - name: config + configMap: + name: sql-exporter-config +--- +# Source: coder-observability/charts/loki/templates/tests/test-canary.yaml +apiVersion: v1 +kind: Pod +metadata: + name: "loki-helm-test" + namespace: coder-observability + labels: + app.kubernetes.io/name: loki + app.kubernetes.io/instance: coder-observability + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: helm-test + annotations: + "helm.sh/hook": test +spec: + containers: + - name: loki-helm-test + image: docker.io/grafana/loki-helm-test:ewelch-distributed-helm-chart-17db5ee + env: + - name: CANARY_SERVICE_ADDRESS + value: "http://loki-canary:3500/metrics" + - name: CANARY_PROMETHEUS_ADDRESS + value: "" + - name: CANARY_TEST_TIMEOUT + value: "1m" + args: + - -test.v + restartPolicy: Never +--- +# Source: coder-observability/charts/loki/charts/minio/templates/post-install-create-bucket-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: loki-storage-make-bucket-job + namespace: "coder-observability" + labels: + app: minio-make-bucket-job + chart: minio-4.0.15 + release: coder-observability + heritage: Helm + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + labels: + app: minio-job + release: coder-observability + app.kubernetes.io/name: loki-storage + spec: + restartPolicy: OnFailure + volumes: + - name: minio-configuration + projected: + sources: + - configMap: + name: loki-storage + - secret: + name: loki-storage + serviceAccountName: minio-sa + containers: + - name: minio-mc + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "/config/initialize"] + env: + - name: MINIO_ENDPOINT + value: loki-storage + - name: MINIO_PORT + value: "9000" + volumeMounts: + - name: minio-configuration + mountPath: /config + resources: + requests: + memory: 128Mi +--- +# Source: coder-observability/charts/loki/charts/minio/templates/post-install-create-user-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: loki-storage-make-user-job + namespace: "coder-observability" + labels: + app: minio-make-user-job + chart: minio-4.0.15 + release: coder-observability + heritage: Helm + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + labels: + app: minio-job + release: coder-observability + app.kubernetes.io/name: loki-storage + spec: + restartPolicy: OnFailure + volumes: + - name: minio-configuration + projected: + sources: + - configMap: + name: loki-storage + - secret: + name: loki-storage + serviceAccountName: minio-sa + containers: + - name: minio-mc + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "/config/add-user"] + env: + - name: MINIO_ENDPOINT + value: loki-storage + - name: MINIO_PORT + value: "9000" + volumeMounts: + - name: minio-configuration + mountPath: /config + resources: + requests: + memory: 128Mi diff --git a/scripts/check-unstaged.sh b/scripts/check-unstaged.sh new file mode 100755 index 0000000..ba39211 --- /dev/null +++ b/scripts/check-unstaged.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +check_unstaged \ No newline at end of file diff --git a/scripts/compile.sh b/scripts/compile.sh new file mode 100755 index 0000000..a00ce4f --- /dev/null +++ b/scripts/compile.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail + +# check versions +HELM_VERSION=3.17 +YQ_VERSION=4.42 +[[ "$(helm version)" == *v${HELM_VERSION}* ]] || { echo "Expected helm version v${HELM_VERSION} but got $(helm version)" >&2; exit 1; } +[[ "$(yq --version)" == *v${YQ_VERSION}* ]] || { echo "Expected yq version v${YQ_VERSION} but got $(yq --version)" >&2; exit 1; } + +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm --repository-cache /tmp/cache repo update +# Check for unexpected changes. +# Helm dependencies are versioned using ^ which accepts minor & patch changes: +# e.g. ^1.2.3 is equivalent to >= 1.2.3 < 2.0.0 +helm dependency update coder-observability/ +# We *expect* that the versions will change in the rendered template output, so we ignore those, but +# if there are changes to the manifests themselves then we need to fail the build to force manual review. +helm template --namespace coder-observability -f coder-observability/values.yaml coder-observability coder-observability/ | \ + yq e 'del(.spec.template.spec.containers[].image, .metadata.labels."helm.sh/chart", .metadata.labels."app.kubernetes.io/version")' - \ + > compiled/resources.yaml + +check_unstaged "compiled" \ No newline at end of file diff --git a/scripts/lib.sh b/scripts/lib.sh new file mode 100755 index 0000000..db924f7 --- /dev/null +++ b/scripts/lib.sh @@ -0,0 +1,24 @@ +function check_unstaged() { + FILES="$(git ls-files --other --modified --exclude-standard -- ${1:-.})" + if [[ "$FILES" != "" ]]; then + mapfile -t files <<<"$FILES" + + echo + echo "The following files contain unstaged changes:" + echo + for file in "${files[@]}"; do + echo " - $file" + done + + echo + echo "These are the changes:" + echo + for file in "${files[@]}"; do + git --no-pager diff "$file" 1>&2 + done + + echo + echo >&2 "Unstaged changes, see above for details." + exit 1 + fi +} \ No newline at end of file diff --git a/scripts/lint-rules.sh b/scripts/lint-rules.sh new file mode 100755 index 0000000..095330d --- /dev/null +++ b/scripts/lint-rules.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +temp_dir="$(mktemp -d)" +rules_file="${temp_dir}/rules.yaml" +helm template coder-o11y coder-observability -f coder-observability/values.yaml --show-only templates/configmap-prometheus-alerts.yaml > ${rules_file} + +for key in $(yq e '.data | keys' -o csv ${rules_file} | tr ',' "\n"); do + file="${temp_dir}/${key}" + echo "=========================== [${file}] ===========================" + + yq e ".data[\"${key}\"]" ${rules_file} > ${file} + go run github.com/cloudflare/pint/cmd/pint@latest -l DEBUG lint ${file} +done \ No newline at end of file diff --git a/scripts/publish.sh b/scripts/publish.sh new file mode 100755 index 0000000..b51878a --- /dev/null +++ b/scripts/publish.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euox pipefail + +version=$("$(dirname "${BASH_SOURCE[0]}")/version.sh") +mkdir -p build/helm +helm package coder-observability --version=${version} --dependency-update --destination build/helm +gsutil cp gs://helm.coder.com/observability/index.yaml build/helm/index.yaml +helm repo index build/helm --url https://helm.coder.com/observability --merge build/helm/index.yaml +gsutil -h "Cache-Control:no-cache,max-age=0" cp build/helm/index.yaml gs://helm.coder.com/observability/ +gsutil -h "Cache-Control:no-cache,max-age=0" cp build/helm/coder-observability-${version}.tgz gs://helm.coder.com/observability/ +gsutil -h "Cache-Control:no-cache,max-age=0" cp artifacthub-repo.yaml gs://helm.coder.com/observability/ + +echo $version \ No newline at end of file diff --git a/scripts/version.sh b/scripts/version.sh new file mode 100755 index 0000000..488df3a --- /dev/null +++ b/scripts/version.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# This script generates the version string used by the helm chart, including for +# dev versions. Note: the version returned by this script will NOT include the "v" +# prefix that is included in the Git tag. +# The script can also bump the version based on the given argument (major, minor, patch). + +set -euo pipefail + +remote_url=$(git remote get-url origin) +current_version="$(git tag -l | sort --version-sort | tail -n1)" + +function help() { + echo "$0 [options] [arguments]" + echo " " + echo "options:" + echo "-h, --help show brief help" + echo "-c, --current show the current version" + echo "-b, --bump bump the version based on the given argument" + exit 0 +} + +function bump_version() { + local version=$1 + local new_version + + if [[ $version == "major" ]]; then + new_version=$(echo $current_version | awk -F. '{print $1+1".0.0"}') + elif [[ $version == "minor" ]]; then + new_version=$(echo $current_version | awk -F. '{print $1"."$2+1".0"}') + elif [[ $version == "patch" ]]; then + new_version=$(echo $current_version | awk -F. '{print $1"."$2"."$3+1}') + else + echo "Error: Unknown argument $version" + exit 1 + fi + + echo $new_version +} + +function show_current() { + # Version without the "v" prefix. + echo "${current_version#v}" +} + +if [ $# == 0 ]; then + show_current +fi + +while test $# -gt 0; do + case "$1" in + -h|--help) + help + ;; + -c|--current) + show_current + shift + ;; + -b|--bump) + if [ $# -lt 2 ]; then + echo "Error: Missing argument for bump" + exit 1 + fi + shift + bump_version $1 + shift + ;; + *) + echo "Error: Unknown argument $1" + exit 1 + ;; + esac +done