From 404e89508dbbe5c4ecbb1620cf3e830bbc12e256 Mon Sep 17 00:00:00 2001 From: m3v113 Date: Mon, 5 Jan 2026 19:53:59 -0800 Subject: [PATCH 1/5] added parsing html for printer info --- printer/modules/collector.py | 34 ++++++++++++++++++++++++++++++++++ printer/requirements.txt | 1 + 2 files changed, 35 insertions(+) diff --git a/printer/modules/collector.py b/printer/modules/collector.py index f7c336fe..33afeb16 100644 --- a/printer/modules/collector.py +++ b/printer/modules/collector.py @@ -5,6 +5,11 @@ from pysnmp.hlapi import * +# imports for parsing html +import requests +import bs4 +from bs4 import BeautifulSoup + from modules.metrics import MetricsHandler metrics_handler = MetricsHandler.instance() @@ -62,11 +67,40 @@ def fetch_ips_from_config(config_file_path): except Exception: logging.exception(f"error opening config file") +def scrape_html(ip): + + url = "http://" + ip + "/" + + try: + page = requests.get(url, timeout=5) + page.raise_for_status() + except Exception: + logging.exception("failed to fetch printer html page") + return + + soup = BeautifulSoup(page.content, 'html.parser') + + content = soup.find_all('td') + text = "%" + ink_level = "" + for element in content: + if text in str(element.string): + ink_level = float((element.text.strip()).rstrip('%')) + metrics_handler.snmp_metric.labels(name="ink_level", ip=ip).set(ink_level) + content = soup.find_all('td', class_='tableDataCellStand width30') + pages_remaining = 0 + for element in content: + try: + pages_remaining = int(element.text.strip()) + metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(pages_remaining) + except Exception: + pass def scrape_snmp(ip_list, sleep_duration_minutes=5): while True: for ip in ip_list: get_snmp_data(ip) + scrape_html(ip) time.sleep(sleep_duration_minutes * 60) diff --git a/printer/requirements.txt b/printer/requirements.txt index 56bd4bf9..08c3b73b 100644 --- a/printer/requirements.txt +++ b/printer/requirements.txt @@ -6,3 +6,4 @@ httpx==0.28.1 requests==2.32.3 pysnmp==4.4.12 pyasn1==0.4.8 +bs4==0.0.2 \ No newline at end of file From 44fe8c96f8a7c9a97ccca393c37d41a3dca29f50 Mon Sep 17 00:00:00 2001 From: m3v113 Date: Mon, 5 Jan 2026 21:11:55 -0800 Subject: [PATCH 2/5] metric line replaced/removed --- printer/modules/collector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/printer/modules/collector.py b/printer/modules/collector.py index 33afeb16..4800a2fe 100644 --- a/printer/modules/collector.py +++ b/printer/modules/collector.py @@ -24,7 +24,6 @@ class SnmpOid(enum.Enum): - INK_LEVEL = ("ink_level", "1.3.6.1.2.1.43.11.1.1.9.1.1") INK_CAPACITY = ("ink_capacity", "1.3.6.1.2.1.43.11.1.1.8.1.1") PAGE_COUNT = ("page_count", "1.3.6.1.2.1.43.10.2.1.4.1.1") TRAY_EMPTY = ("tray_empty", "1.3.6.1.2.1.43.18.1.1.8.1.13", True) From 363287d6ff5d41a39095f0b0f364fe1715249dcc Mon Sep 17 00:00:00 2001 From: m3v113 <76668066+m3v113@users.noreply.github.com> Date: Mon, 5 Jan 2026 21:02:56 -0800 Subject: [PATCH 3/5] changed exception Co-authored-by: Evan Ugarte <36345325+evanugarte@users.noreply.github.com> --- printer/modules/collector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/printer/modules/collector.py b/printer/modules/collector.py index 4800a2fe..54143675 100644 --- a/printer/modules/collector.py +++ b/printer/modules/collector.py @@ -92,8 +92,8 @@ def scrape_html(ip): try: pages_remaining = int(element.text.strip()) metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(pages_remaining) - except Exception: - pass + except ValueError: + continue def scrape_snmp(ip_list, sleep_duration_minutes=5): while True: From bc2ba0ecd86fd90b6b597a12d777c3798af54c3e Mon Sep 17 00:00:00 2001 From: evan Date: Sat, 10 Jan 2026 22:02:46 -0800 Subject: [PATCH 4/5] cleaning up --- printer/modules/collector.py | 5 +---- printer/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/printer/modules/collector.py b/printer/modules/collector.py index 54143675..d3096c7f 100644 --- a/printer/modules/collector.py +++ b/printer/modules/collector.py @@ -3,12 +3,9 @@ import logging import json +from bs4 import BeautifulSoup from pysnmp.hlapi import * - -# imports for parsing html import requests -import bs4 -from bs4 import BeautifulSoup from modules.metrics import MetricsHandler diff --git a/printer/requirements.txt b/printer/requirements.txt index 08c3b73b..5af47a40 100644 --- a/printer/requirements.txt +++ b/printer/requirements.txt @@ -6,4 +6,4 @@ httpx==0.28.1 requests==2.32.3 pysnmp==4.4.12 pyasn1==0.4.8 -bs4==0.0.2 \ No newline at end of file +bs4==0.0.2 From a6f13506d360c3c2d96760094824652847522771 Mon Sep 17 00:00:00 2001 From: evan Date: Sat, 10 Jan 2026 22:07:33 -0800 Subject: [PATCH 5/5] imagine this works --- printer/modules/collector.py | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/printer/modules/collector.py b/printer/modules/collector.py index d3096c7f..3b66100a 100644 --- a/printer/modules/collector.py +++ b/printer/modules/collector.py @@ -63,32 +63,32 @@ def fetch_ips_from_config(config_file_path): except Exception: logging.exception(f"error opening config file") -def scrape_html(ip): - - url = "http://" + ip + "/" - +def scrape_html(ip: str): + url = f"http://{ip}/" + try: - page = requests.get(url, timeout=5) - page.raise_for_status() - except Exception: + response = requests.get(url, timeout=5) + response.raise_for_status() + except requests.RequestException: logging.exception("failed to fetch printer html page") return - soup = BeautifulSoup(page.content, 'html.parser') - - content = soup.find_all('td') - text = "%" - ink_level = "" - for element in content: - if text in str(element.string): - ink_level = float((element.text.strip()).rstrip('%')) - metrics_handler.snmp_metric.labels(name="ink_level", ip=ip).set(ink_level) - content = soup.find_all('td', class_='tableDataCellStand width30') - pages_remaining = 0 - for element in content: + soup = BeautifulSoup(response.content, 'html.parser') + + ink_td = soup.find('td', string=lambda s: s and '%' in s) + if ink_td: + try: + # Clean: strip whitespace, remove %, convert to float + level = float(ink_td.text.strip().rstrip('%')) + metrics_handler.snmp_metric.labels(name="ink_level", ip=ip).set(level) + except ValueError: + logging.warning(f"Could not parse ink level from: {ink_td.text}") + + page_cells = soup.select('td.tableDataCellStand.width30') + for cell in page_cells: try: - pages_remaining = int(element.text.strip()) - metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(pages_remaining) + count = int(cell.text.strip()) + metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(count) except ValueError: continue