Compare commits

...

32 Commits

Author SHA1 Message Date
packagrio-bot 1a05868381 (v0.4.5) Automated packaging of release by Packagr 2022-05-15 05:29:17 +00:00
Jason Kulatunga a35c3bae08 make 50-cron-config and entrypoint-collector.sh mirrors of each other. This is for ease of maintainability.
related #121
2022-05-14 22:04:46 -07:00
Jason Kulatunga 0c908786e0 Update TROUBLESHOOTING_INFLUXDB.md 2022-05-14 18:13:25 -07:00
Jason Kulatunga 2ba196d6a8 added instructions about upgrading from 0.3.x to 0.4.x 2022-05-13 16:30:07 -07:00
Jason Kulatunga 0e00999d79 added instructions about upgrading from 0.3.x to 0.4.x 2022-05-13 16:29:07 -07:00
Jason Kulatunga 5adceeb9a5 update with SMART vs Scrutiny details. 2022-05-12 21:58:36 -07:00
packagrio-bot b5920e35e3 (v0.4.4) Automated packaging of release by Packagr 2022-05-13 02:00:33 +00:00
Jason Kulatunga fb8f248366 add image for instructions. 2022-05-12 17:29:13 -07:00
Jason Kulatunga 7a931bd018 instructions for how to get your influxdb admin token + other troubleshooting tricks. 2022-05-12 16:00:04 -07:00
Jason Kulatunga a004f85145 fixing cron.
related #121
2022-05-12 15:24:51 -07:00
Jason Kulatunga eeb086c77f using export -p ratehr than printenv to export environmental variables (export -p correctly wraps envs in quotes) 2022-05-12 15:06:29 -07:00
Jason Kulatunga 54b195f851 updating ignore file for testing. 2022-05-12 14:09:20 -07:00
Jason Kulatunga 5dc79134b2 cron file consistent logging (still broken) 2022-05-12 13:20:54 -07:00
Jason Kulatunga f3fad47d9e cleanup example config files. added instructions for influxdb. 2022-05-12 10:33:54 -07:00
Jason Kulatunga 489534cb73 update manual instructions to include InfluxDB installation. 2022-05-12 10:30:36 -07:00
Jason Kulatunga e7801619cd added additional tests from #187.
Detected that the frontend was incorrectly classifying Scrutiny Failures as Warnings.

Fixed.
2022-05-12 10:04:06 -07:00
Jason Kulatunga 7b75b5f9bb update docker instructions. 2022-05-12 09:26:00 -07:00
Jason Kulatunga 0022d848d6 added example hub/spoke docker-compose file. 2022-05-12 09:26:00 -07:00
Jason Kulatunga d47c4ea99a added example hub/spoke docker-compose file. 2022-05-12 09:26:00 -07:00
Jason Kulatunga 62354f2ab8 created example omnibus docker-compose file. 2022-05-12 09:26:00 -07:00
Jason Kulatunga 3a0adb406f Merge pull request #243 from Zorlin/master
Add Ansible instructions
2022-05-12 09:21:36 -07:00
Jason Kulatunga 2a39421524 Merge pull request #245 from henfri/patch-1 2022-05-11 11:34:52 -07:00
henfri a7dc68822f Update docker-compose.yml
to fix https://github.com/AnalogJ/scrutiny/issues/241
2022-05-11 20:09:40 +02:00
Benjamin Arntzen 3ad87aecc6 Add Ansible instructions 2022-05-11 10:06:09 +08:00
Jason Kulatunga 2ab714f575 Update README.md 2022-05-10 15:33:53 -07:00
Jason Kulatunga 9e60fb8d73 schedule the collector upload before the webapp upload (webapp seems to fail with OOM error during compilation) 2022-05-10 08:49:52 -07:00
Jason Kulatunga a846522830 disable sponsor label. broken. 2022-05-10 08:42:35 -07:00
Jason Kulatunga 3d7d276236 enable caching on all docker builds. 2022-05-09 21:55:48 -07:00
Jason Kulatunga 2660af7ce3 build arm32v7 images for web and collector (cannot for omnibus because InfluxDB does not support it yet. See #236 ) 2022-05-09 21:09:53 -07:00
Jason Kulatunga f5af86fd46 using docker build cache for speedier builds 2022-05-09 20:37:15 -07:00
Jason Kulatunga 4bad2d7b03 upgrading github actions for docker builds. (hopefully faster). 2022-05-09 20:21:52 -07:00
Jason Kulatunga a79930916e only run CI on PR. 2022-05-09 19:58:52 -07:00
27 changed files with 1591 additions and 71 deletions
+1 -1
View File
@@ -1,6 +1,6 @@
name: CI
# This workflow is triggered on pushes & pull requests
on: [push, pull_request]
on: [pull_request]
jobs:
build:
+23 -18
View File
@@ -22,14 +22,14 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v2
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
@@ -38,7 +38,7 @@ jobs:
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
uses: docker/metadata-action@v4
with:
tags: |
type=ref,enable=true,event=branch,suffix=-collector
@@ -48,14 +48,16 @@ jobs:
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
uses: docker/build-push-action@v3
with:
platforms: linux/amd64,linux/arm64
platforms: linux/amd64,linux/arm64,linux/arm/v7
context: .
file: docker/Dockerfile.collector
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
web:
runs-on: ubuntu-latest
@@ -67,14 +69,14 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v2
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
@@ -83,7 +85,7 @@ jobs:
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
uses: docker/metadata-action@v4
with:
tags: |
type=ref,enable=true,event=branch,suffix=-web
@@ -93,15 +95,16 @@ jobs:
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
uses: docker/build-push-action@v3
with:
platforms: linux/amd64,linux/arm64
platforms: linux/amd64,linux/arm64,linux/arm/v7
context: .
file: docker/Dockerfile.web
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
omnibus:
runs-on: ubuntu-latest
permissions:
@@ -112,14 +115,14 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v2
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
@@ -128,7 +131,7 @@ jobs:
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
uses: docker/metadata-action@v4
with:
tags: |
type=ref,enable=true,event=branch,suffix=-omnibus
@@ -138,7 +141,7 @@ jobs:
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
uses: docker/build-push-action@v3
with:
platforms: linux/amd64,linux/arm64
context: .
@@ -146,3 +149,5 @@ jobs:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
+10 -10
View File
@@ -59,6 +59,16 @@ jobs:
file "$GITHUB_WORKSPACE/dist/scrutiny-collector-metrics-${GOOS}-${GOARCH}" || true
ldd "$GITHUB_WORKSPACE/dist/scrutiny-collector-metrics-${GOOS}-${GOARCH}" || true
- name: Release Asset - Collector - freebsd-amd64
id: upload-release-asset2
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.SCRUTINY_GITHUB_TOKEN }}
with:
upload_url: ${{ github.event.release.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps
asset_path: './dist/scrutiny-collector-metrics-freebsd-amd64'
asset_name: scrutiny-collector-metrics-freebsd-amd64
asset_content_type: application/octet-stream
- name: Release Asset - Web - freebsd-amd64
id: upload-release-asset1
@@ -71,13 +81,3 @@ jobs:
asset_name: scrutiny-web-freebsd-amd64
asset_content_type: application/octet-stream
- name: Release Asset - Collector - freebsd-amd64
id: upload-release-asset2
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.SCRUTINY_GITHUB_TOKEN }}
with:
upload_url: ${{ github.event.release.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps
asset_path: './dist/scrutiny-collector-metrics-freebsd-amd64'
asset_name: scrutiny-collector-metrics-freebsd-amd64
asset_content_type: application/octet-stream
+1
View File
@@ -8,6 +8,7 @@ jobs:
build:
name: is-sponsor-label
runs-on: ubuntu-latest
if: ${{ false }}
steps:
- uses: JasonEtco/is-sponsor-label-action@v1.2.0
env:
+2
View File
@@ -64,3 +64,5 @@ scrutiny-*.db
scrutiny_test.db
scrutiny.yaml
coverage.txt
/config
/influxdb
+6 -2
View File
@@ -70,8 +70,10 @@ Scrutiny uses `smartctl --scan` to detect devices/drives.
If you're using Docker, getting started is as simple as running the following command:
> See [docker/example.omnibus.docker-compose.yml](./docker/example.omnibus.docker-compose.yml) for a docker-compose file.
```bash
docker run -it --rm -p 8080:8080 \
docker run -it --rm -p 8080:8080 -p 8086:8086 \
-v `pwd`/scrutiny:/opt/scrutiny/config \
-v `pwd`/influxdb2:/opt/scrutiny/influxdb \
-v /run/udev:/run/udev:ro \
@@ -95,6 +97,8 @@ In addition to the Omnibus image (available under the `latest` tag) there are 2
- `ghcr.io/analogj/scrutiny:master-collector` - Contains the Scrutiny data collector, `smartctl` binary and cron-like scheduler. You can run one collector on each server.
- `ghcr.io/analogj/scrutiny:master-web` - Contains the Web UI, API and Database. Only one container necessary
> See [docker/example.hubspoke.docker-compose.yml](./docker/example.hubspoke.docker-compose.yml) for a docker-compose file.
```bash
docker run --rm -p 8086:8086 \
-v `pwd`/influxdb2:/var/lib/influxdb2 \
@@ -233,7 +237,7 @@ scrutiny-collector-metrics run --debug --log-file /tmp/collector.log
| amd64 | :white_check_mark: | :white_check_mark: |
| arm-5 | :white_check_mark: | |
| arm-6 | :white_check_mark: | |
| arm-7 | :white_check_mark: | |
| arm-7 | :white_check_mark: | web/collector only. see [#236](https://github.com/AnalogJ/scrutiny/issues/236) |
| arm64 | :white_check_mark: | :white_check_mark: |
| freebsd | :white_check_mark: | |
| macos-amd64 | | :white_check_mark: |
+9 -4
View File
@@ -1,14 +1,19 @@
#!/bin/bash
# Cron runs in its own isolated environment (usually using only /etc/environment )
# So when the container starts up, we will do a dump of the runtime environment into a .env file that we
# will then source into the crontab file (/etc/cron.d/scrutiny.sh)
printenv | sed 's/^\(.*\)$/export \1/g' > /env.sh
# will then source into the crontab file (/etc/cron.d/scrutiny)
(set -o posix; export -p) > /env.sh
# adding ability to customize the cron schedule.
COLLECTOR_CRON_SCHEDULE=${COLLECTOR_CRON_SCHEDULE:-"0 0 * * *"}
# if the cron schedule has been overridden via env variable (eg docker-compose) we should make sure to strip quotes
[[ "${COLLECTOR_CRON_SCHEDULE}" == \"*\" || "${COLLECTOR_CRON_SCHEDULE}" == \'*\' ]] && COLLECTOR_CRON_SCHEDULE="${COLLECTOR_CRON_SCHEDULE:1:-1}"
# replace placeholder with correct value
sed -i 's|{COLLECTOR_CRON_SCHEDULE}|'"${COLLECTOR_CRON_SCHEDULE}"'|g' /etc/cron.d/scrutiny
# now that we have the env start cron in the foreground
echo "starting cron"
su -c "cron -l 8 -f" root
su -c "cron -f -L 15" root
@@ -0,0 +1,48 @@
version: '2.4'
services:
influxdb:
image: influxdb:2.2
ports:
- '8086:8086'
volumes:
- './influxdb:/var/lib/influxdb2'
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8086/health"]
interval: 5s
timeout: 10s
retries: 20
web:
image: 'ghcr.io/analogj/scrutiny:master-web'
ports:
- '8080:8080'
volumes:
- './config:/opt/scrutiny/config'
environment:
SCRUTINY_WEB_INFLUXDB_HOST: 'influxdb'
depends_on:
influxdb:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/api/health"]
interval: 5s
timeout: 10s
retries: 20
start_period: 10s
collector:
image: 'ghcr.io/analogj/scrutiny:master-collector'
cap_add:
- SYS_RAWIO
volumes:
- '/run/udev:/run/udev:ro'
environment:
COLLECTOR_API_ENDPOINT: 'http://web:8080'
depends_on:
web:
condition: service_healthy
devices:
- "/dev/sda"
- "/dev/sdb"
@@ -7,11 +7,12 @@ services:
cap_add:
- SYS_RAWIO
ports:
- "8080:8080"
- "8080:8080" # webapp
- "8086:8086" # influxDB admin
volumes:
- /run/udev:/run/udev:ro
- ./config:/opt/scrutiny/config
- ./influxdb:/var/lib/influxdb2
- ./influxdb:/opt/scrutiny/influxdb
devices:
- "/dev/sda"
- "/dev/sdb"
+17
View File
@@ -0,0 +1,17 @@
# Ansible Install
[Zorlin](https://github.com/Zorlin) has developed and now maintains [an Ansible playbook](https://github.com/Zorlin/scrutiny-playbook) which automates the steps involved in manually setting up Scrutiny.
Using it is simple:
* Grab a copy of the playbook
* Follow the directions in the playbook repository
* Run `ansible-playbook site.yml`
* Visit http://your-machine:8080 to see your new Scrutiny installation.
It will automatically pull metrics from machines once a day, at 1am.
You can see it in action below.
[![asciicast](https://asciinema.org/a/493531.svg)](https://asciinema.org/a/493531)
+1
View File
@@ -0,0 +1 @@
> See [docker/example.hubspoke.docker-compose.yml](./docker/example.hubspoke.docker-compose.yml) for a docker-compose file.
+19 -2
View File
@@ -2,12 +2,18 @@
While the easiest way to get started with [Scrutiny is using Docker](https://github.com/AnalogJ/scrutiny#docker),
it is possible to run it manually without much work. You can even mix and match, using Docker for one component and
a manual installation for the other.
a manual installation for the other. There's also [an installer](INSTALL_ANSIBLE.md) which automates this manual installation procedure.
Scrutiny is made up of two components: a collector and a webapp/api. Here's how each component can be deployed manually.
Scrutiny is made up of three components: an influxdb Database, a collector and a webapp/api. Here's how each component can be deployed manually.
> Note: the `/opt/scrutiny` directory is not hardcoded, you can use any directory name/path.
## InfluxDB
Please follow the official InfluxDB installation guide. Note, you'll need to install v2.2.0+.
https://docs.influxdata.com/influxdb/v2.2/install/
## Webapp/API
### Dependencies
@@ -45,6 +51,17 @@ web:
# The path to the Scrutiny frontend files (js, css, images) must be specified.
# We'll populate it with files in the next section
path: /opt/scrutiny/web
# if you're runnning influxdb on a different host (or using a cloud-provider) you'll need to update the host & port below.
# token, org, bucket are unnecessary for a new InfluxDB installation, as Scrutiny will automatically run the InfluxDB setup,
# and store the information in the config file. If you 're re-using an existing influxdb installation, you'll need to provide
# the `token`
influxdb:
host: 0.0.0.0
port: 8086
# token: 'my-token'
# org: 'my-org'
# bucket: 'bucket'
```
> Note: for a full list of available configuration options, please check the [example.scrutiny.yaml](https://github.com/AnalogJ/scrutiny/blob/master/example.scrutiny.yaml) file.
+13
View File
@@ -118,6 +118,19 @@ instead of the block device (`/dev/nvme0n1`). See [#209](https://github.com/Anal
### Volume Mount All Devices (`/dev`) - Privileged
## Scrutiny detects Failure but SMART Passed?
There's 2 different mechanisms that Scrutiny uses to detect failures.
The first is simple SMART failures. If SMART thinks an attribute is in a failed state, Scrutiny will display it as failed as well.
The second is using BackBlaze failure data: [https://backblaze.com/blog-smart-stats-2014-8.html](https://backblaze.com/blog-smart-stats-2014-8.html)
If Scrutiny detects that an attribute corresponds with a high rate of failure using BackBlaze's data, it will also mark that attribute (and disk) as failed (even though SMART may think the device is still healthy).
This can cause some confusion when comparing Scrutiny's dashboard against other SMART analysis tools.
If you hover over the "failed" label beside an attribute, Scrutiny will tell you if the failure was due to SMART or Scrutiny/BackBlaze data.
## Hub & Spoke model, with multiple Hosts.
When deploying Scrutiny in a hub & spoke model, it can be difficult to determine exactly which node a set of devices are associated with.
+68
View File
@@ -0,0 +1,68 @@
# InfluxDB Troubleshooting
## Installation
InfluxDB is a required dependency for Scrutiny v0.4.0+.
https://docs.influxdata.com/influxdb/v2.2/install/
## Persistence
To ensure that all data is correctly stored, you must also persist the InfluxDB database directory
- If you're using the Official Scrutiny Omnibus image (`ghcr.io/analogj/scrutiny:master-omnibus`), the path is `/opt/scrutiny/influxdb`
- If you're deploying in Hub/Spoke mode with the InfluxDB maintained image (`influxdb:2.2`), the path is `/var/lib/influxdb2`
If you attempt to restart Scrutiny but you forgot to persist the InfluxDB directory, you will get an error message like follows:
```
scrutiny | time="2022-05-12T22:54:12Z" level=info msg="Trying to connect to scrutiny sqlite db: /opt/scrutiny/config/scrutiny.db\n"
scrutiny | time="2022-05-12T22:54:12Z" level=info msg="Successfully connected to scrutiny sqlite db: /opt/scrutiny/config/scrutiny.db\n"
scrutiny | ts=2022-05-12T22:54:12.240791Z lvl=info msg=Unauthorized log_id=0aQcVlOW000 error="authorization not found"
scrutiny | panic: unauthorized: unauthorized access
```
Unfortunately this may mean that your database is lost, and the previous Scrutiny data is unavailable.
You should fix the docker-compose/docker run command that you're using to ensure that your database folder is persisted correctly,
then delete the `web.influxdb.token` field in your `scrutiny.yaml` file, and then restart Scrutiny.
## First Start
The web/api service will trigger an InfluxDB onboarding process automatically when it first starts. After that, it will store the newly generated influxdb api token in the Scrutiny config file.
If this Credential is not correctly stored in the scrutiny config file, Scrutiny will fail to start (with an authentication error)
```
scrutiny | time="2022-05-12T22:52:55Z" level=info msg="Successfully connected to scrutiny sqlite db: /opt/scrutiny/config/scrutiny.db\n"
scrutiny | ts=2022-05-12T22:52:55.235753Z lvl=error msg="failed to onboard user admin" log_id=0aQcRnc0000 handler=onboard error="onboarding has already been completed" took=0.038ms
scrutiny | ts=2022-05-12T22:52:55.235816Z lvl=error msg="api error encountered" log_id=0aQcRnc0000 error="onboarding has already been completed"
scrutiny | panic: conflict: onboarding has already been completed
```
You can fix this issue by authenticating to the InfluxDB admin portal (the default credentials are username: `admin`, password: `password12345`),
then retrieving the API token, and writing it to your `scrutiny.yaml` config file under the `web.influxdb.token` field:
![influx db admin token](./influxdb-admin-token.png)
## Upgrading from v0.3.x to v0.4.x
When upgrading from v0.3.x to v0.4.x, some users have noticed problems such as:
```
2022/05/13 14:38:05 Loading configuration file: /opt/scrutiny/config/scrutiny.yaml
time="2022-05-13T14:38:05Z" level=info msg="Trying to connect to scrutiny sqlite db:"
time="2022-05-13T14:38:05Z" level=info msg="Successfully connected to scrutiny sqlite db:"
panic: a username and password is required for a setup
```
As discussed in [#248](https://github.com/AnalogJ/scrutiny/issues/248) and [#234](https://github.com/AnalogJ/scrutiny/issues/234),
this usually related to either:
- Upgrading from the LSIO Scrutiny image to the Official Scrutiny image, without removing LSIO specific environmental variables
- remove the `SCRUTINY_WEB=true` and `SCRUTINY_COLLECTOR=true` environmental variables. They were used by the LSIO image, but are unnecessary and cause issues with the official Scrutiny image.
- Updated versions of the [LSIO Scrutiny images are broken](https://github.com/linuxserver/docker-scrutiny/issues/22), as they have not installed InfluxDB which is a required dependency of Scrutiny v0.4.x
- You can revert to an earlier version of the LSIO image (`lscr.io/linuxserver/scrutiny:060ac7b8-ls34`), or just change to the official Scrutiny image (`ghcr.io/analogj/scrutiny:master-omnibus`)
Here's a couple of confirmed working docker-compose files that you may want to look at:
- https://github.com/AnalogJ/scrutiny/blob/master/docker/example.hubspoke.docker-compose.yml
- https://github.com/AnalogJ/scrutiny/blob/master/docker/example.omnibus.docker-compose.yml
Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

+10
View File
@@ -72,3 +72,13 @@ devices:
#
########################################################################################################################
#collect:
# metric:
# enable: true
# command: '-a -o on -S on'
# long:
# enable: false
# command: ''
# short:
# enable: false
# command: ''
+5 -17
View File
@@ -34,6 +34,11 @@ web:
# the location on the filesystem where scrutiny javascript + css is located
frontend:
path: /opt/scrutiny/web
# if you're running influxdb on a different host (or using a cloud-provider) you'll need to update the host & port below.
# token, org, bucket are unnecessary for a new InfluxDB installation, as Scrutiny will automatically run the InfluxDB setup,
# and store the information in the config file. If you 're re-using an existing influxdb installation, you'll need to provide
# the `token`
influxdb:
host: 0.0.0.0
port: 8086
@@ -75,12 +80,6 @@ log:
#
########################################################################################################################
#disks:
# include:
# # - /dev/sda
# exclude:
# # - /dev/sdb
#limits:
# ata:
# critical:
@@ -95,14 +94,3 @@ log:
# critical: true
# standard: true
#collect:
# metric:
# enable: true
# command: '-a -o on -S on'
# long:
# enable: false
# command: ''
# short:
# enable: false
# command: ''
-4
View File
@@ -1,4 +0,0 @@
#!/usr/bin/with-contenv bash
COLLECTOR_CRON_SCHEDULE=${COLLECTOR_CRON_SCHEDULE:-"0 0 * * *"}
sed -i 's|{COLLECTOR_CRON_SCHEDULE}|'"${COLLECTOR_CRON_SCHEDULE}"'|g' /etc/cron.d/scrutiny
+15
View File
@@ -0,0 +1,15 @@
#!/usr/bin/with-contenv bash
# Cron runs in its own isolated environment (usually using only /etc/environment )
# So when the container starts up, we will do a dump of the runtime environment into a .env file that we
# will then source into the crontab file (/etc/cron.d/scrutiny)
(set -o posix; export -p) > /env.sh
# adding ability to customize the cron schedule.
COLLECTOR_CRON_SCHEDULE=${COLLECTOR_CRON_SCHEDULE:-"0 0 * * *"}
# if the cron schedule has been overridden via env variable (eg docker-compose) we should make sure to strip quotes
[[ "${COLLECTOR_CRON_SCHEDULE}" == \"*\" || "${COLLECTOR_CRON_SCHEDULE}" == \'*\' ]] && COLLECTOR_CRON_SCHEDULE="${COLLECTOR_CRON_SCHEDULE:1:-1}"
# replace placeholder with correct value
sed -i 's|{COLLECTOR_CRON_SCHEDULE}|'"${COLLECTOR_CRON_SCHEDULE}"'|g' /etc/cron.d/scrutiny
+1 -1
View File
@@ -9,5 +9,5 @@ s6-svc -O /var/run/s6/services/collector-once
# wait until scrutiny is "Ready"
until $(curl --output /dev/null --silent --head --fail http://localhost:8080/api/health); do echo "scrutiny api not ready" && sleep 5; done
echo "starting scrutiny collector"
echo "starting scrutiny collector (run-once mode. subsequent calls will be triggered via cron service)"
/opt/scrutiny/bin/scrutiny-collector-metrics run
Regular → Executable
+1 -7
View File
@@ -1,10 +1,4 @@
#!/usr/bin/with-contenv bash
# Cron runs in its own isolated environment (usually using only /etc/environment )
# So when the container starts up, we will do a dump of the runtime environment into a .env file that we
# will then source into the crontab file (/etc/cron.d/scrutiny.sh)
printenv | sed 's/^\(.*\)$/export \1/g' > /env.sh
echo "starting cron"
cron -f
cron -f -L 15
@@ -93,6 +93,8 @@ func (sa *SmartAtaAttribute) PopulateAttributeStatus() *SmartAtaAttribute {
//this attribute has previously failed
sa.Status = pkg.SmartAttributeStatusFailed
sa.StatusReason = "Attribute is failing manufacturer SMART threshold"
//if the Smart Status is failed, we should exit early, no need to look at thresholds.
return sa
} else if strings.ToUpper(sa.WhenFailed) == pkg.SmartWhenFailedInThePast {
sa.Status = pkg.SmartAttributeStatusWarning
@@ -381,6 +381,70 @@ func TestFromCollectorSmartInfo_Fail_ScrutinySmart(t *testing.T) {
require.Equal(t, 17, len(smartMdl.Attributes))
}
func TestFromCollectorSmartInfo_Fail_ScrutinyNonCriticalFailed(t *testing.T) {
//setup
smartDataFile, err := os.Open("../testdata/smart-ata-failed-scrutiny.json")
require.NoError(t, err)
defer smartDataFile.Close()
var smartJson collector.SmartInfo
smartDataBytes, err := ioutil.ReadAll(smartDataFile)
require.NoError(t, err)
err = json.Unmarshal(smartDataBytes, &smartJson)
require.NoError(t, err)
//test
smartMdl := measurements.Smart{}
err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson)
//assert
require.NoError(t, err)
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
require.Equal(t, pkg.DeviceStatusFailedScrutiny, smartMdl.Status)
require.Equal(t, int64(pkg.SmartAttributeStatusFailed), smartMdl.Attributes["199"].GetStatus(),
"scrutiny should detect that %d failed (status: %d, %s)",
smartMdl.Attributes["199"].(*measurements.SmartAtaAttribute).AttributeId,
smartMdl.Attributes["199"].GetStatus(), smartMdl.Attributes["199"].(*measurements.SmartAtaAttribute).StatusReason,
)
require.Equal(t, 14, len(smartMdl.Attributes))
}
//TODO: Scrutiny Warn
//TODO: Smart + Scrutiny Warn
func TestFromCollectorSmartInfo_NVMe_Fail_Scrutiny(t *testing.T) {
//setup
smartDataFile, err := os.Open("../testdata/smart-nvme-failed.json")
require.NoError(t, err)
defer smartDataFile.Close()
var smartJson collector.SmartInfo
smartDataBytes, err := ioutil.ReadAll(smartDataFile)
require.NoError(t, err)
err = json.Unmarshal(smartDataBytes, &smartJson)
require.NoError(t, err)
//test
smartMdl := measurements.Smart{}
err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson)
//assert
require.NoError(t, err)
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
require.Equal(t, pkg.DeviceStatusFailedScrutiny, smartMdl.Status)
require.Equal(t, int64(pkg.SmartAttributeStatusFailed), smartMdl.Attributes["media_errors"].GetStatus(),
"scrutiny should detect that %s failed (status: %d, %s)",
smartMdl.Attributes["media_errors"].(*measurements.SmartNvmeAttribute).AttributeId,
smartMdl.Attributes["media_errors"].GetStatus(),
smartMdl.Attributes["media_errors"].(*measurements.SmartNvmeAttribute).StatusReason,
)
require.Equal(t, 16, len(smartMdl.Attributes))
}
func TestFromCollectorSmartInfo_Nvme(t *testing.T) {
//setup
smartDataFile, err := os.Open("../testdata/smart-nvme.json")
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,107 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
0
],
"svn_revision": "4883",
"platform_info": "x86_64-linux-5.13.0-40-generic",
"build_info": "(local build)",
"argv": [
"smartctl",
"-x",
"-j",
"/dev/nvme0"
],
"exit_status": 0
},
"device": {
"name": "/dev/nvme0",
"info_name": "/dev/nvme0",
"type": "nvme",
"protocol": "NVMe"
},
"model_name": "Samsung SSD 970 EVO 500GB",
"serial_number": "S466NX0M776250H",
"firmware_version": "2B2QEXE7",
"nvme_pci_vendor": {
"id": 5197,
"subsystem_id": 5197
},
"nvme_ieee_oui_identifier": 9528,
"nvme_total_capacity": 500107862016,
"nvme_unallocated_capacity": 0,
"nvme_controller_id": 4,
"nvme_number_of_namespaces": 1,
"nvme_namespaces": [
{
"id": 1,
"size": {
"blocks": 976773168,
"bytes": 500107862016
},
"capacity": {
"blocks": 976773168,
"bytes": 500107862016
},
"utilization": {
"blocks": 327275384,
"bytes": 167564996608
},
"formatted_lba_size": 512,
"eui64": {
"oui": 9528,
"ext_id": 376106710327
}
}
],
"user_capacity": {
"blocks": 976773168,
"bytes": 500107862016
},
"logical_block_size": 512,
"local_time": {
"time_t": 1652220188,
"asctime": "Tue May 10 22:03:08 2022 UTC"
},
"smart_status": {
"passed": true,
"nvme": {
"value": 0
}
},
"nvme_smart_health_information_log": {
"critical_warning": 0,
"temperature": 35,
"available_spare": 99,
"available_spare_threshold": 10,
"percentage_used": 3,
"data_units_read": 17176794,
"data_units_written": 65602088,
"host_reads": 118020838,
"host_writes": 874050000,
"controller_busy_time": 7601,
"power_cycles": 25,
"power_on_hours": 12798,
"unsafe_shutdowns": 10,
"media_errors": 7,
"num_err_log_entries": 62,
"warning_temp_time": 0,
"critical_comp_time": 0,
"temperature_sensors": [
35,
39
]
},
"temperature": {
"current": 35
},
"power_cycle_count": 25,
"power_on_time": {
"hours": 12798
}
}
+1 -1
View File
@@ -2,4 +2,4 @@ package version
// VERSION is the app-global version string, which will be replaced with a
// new value during packaging
const VERSION = "0.4.3"
const VERSION = "0.4.5"
@@ -111,9 +111,9 @@ export class DetailComponent implements OnInit, AfterViewInit, OnDestroy {
if(attribute_status == 0){
return "passed"
} else if (attribute_status == 1){
return "warn"
} else if (attribute_status == 2){
return "failed"
} else if (attribute_status == 2){
return "warn"
}
return
}