From 404e89508dbbe5c4ecbb1620cf3e830bbc12e256 Mon Sep 17 00:00:00 2001
From: m3v113 <mehvishbondre@gmail.com>
Date: Mon, 5 Jan 2026 19:53:59 -0800
Subject: [PATCH 1/5] added parsing html for printer info

---
 printer/modules/collector.py | 34 ++++++++++++++++++++++++++++++++++
 printer/requirements.txt     |  1 +
 2 files changed, 35 insertions(+)

diff --git a/printer/modules/collector.py b/printer/modules/collector.py
index f7c336fe..33afeb16 100644
--- a/printer/modules/collector.py
+++ b/printer/modules/collector.py
@@ -5,6 +5,11 @@
 
 from pysnmp.hlapi import *
 
+# imports for parsing html
+import requests
+import bs4
+from bs4 import BeautifulSoup
+
 from modules.metrics import MetricsHandler
 
 metrics_handler = MetricsHandler.instance()
@@ -62,11 +67,40 @@ def fetch_ips_from_config(config_file_path):
     except Exception:
         logging.exception(f"error opening config file")
 
+def scrape_html(ip):
+
+    url = "http://" + ip + "/"
+
+    try:
+        page = requests.get(url, timeout=5)
+        page.raise_for_status()
+    except Exception:
+        logging.exception("failed to fetch printer html page")
+        return
+
+    soup = BeautifulSoup(page.content, 'html.parser')
+
+    content = soup.find_all('td')
+    text = "%"
+    ink_level = ""
+    for element in content:
+        if text in str(element.string):
+            ink_level = float((element.text.strip()).rstrip('%'))
+            metrics_handler.snmp_metric.labels(name="ink_level", ip=ip).set(ink_level)
+    content = soup.find_all('td', class_='tableDataCellStand width30')
+    pages_remaining = 0
+    for element in content:
+        try:
+            pages_remaining = int(element.text.strip())
+            metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(pages_remaining)
+        except Exception:
+            pass
 
 def scrape_snmp(ip_list, sleep_duration_minutes=5):
     while True:
         for ip in ip_list:
             get_snmp_data(ip)
+            scrape_html(ip)
         time.sleep(sleep_duration_minutes * 60)
 
 
diff --git a/printer/requirements.txt b/printer/requirements.txt
index 56bd4bf9..08c3b73b 100644
--- a/printer/requirements.txt
+++ b/printer/requirements.txt
@@ -6,3 +6,4 @@ httpx==0.28.1
 requests==2.32.3
 pysnmp==4.4.12
 pyasn1==0.4.8
+bs4==0.0.2
\ No newline at end of file

From 44fe8c96f8a7c9a97ccca393c37d41a3dca29f50 Mon Sep 17 00:00:00 2001
From: m3v113 <mehvishbondre@gmail.com>
Date: Mon, 5 Jan 2026 21:11:55 -0800
Subject: [PATCH 2/5] metric line replaced/removed

---
 printer/modules/collector.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/printer/modules/collector.py b/printer/modules/collector.py
index 33afeb16..4800a2fe 100644
--- a/printer/modules/collector.py
+++ b/printer/modules/collector.py
@@ -24,7 +24,6 @@
 
 
 class SnmpOid(enum.Enum):
-    INK_LEVEL = ("ink_level", "1.3.6.1.2.1.43.11.1.1.9.1.1")
     INK_CAPACITY = ("ink_capacity", "1.3.6.1.2.1.43.11.1.1.8.1.1")
     PAGE_COUNT = ("page_count", "1.3.6.1.2.1.43.10.2.1.4.1.1")
     TRAY_EMPTY = ("tray_empty", "1.3.6.1.2.1.43.18.1.1.8.1.13", True)

From 363287d6ff5d41a39095f0b0f364fe1715249dcc Mon Sep 17 00:00:00 2001
From: m3v113 <76668066+m3v113@users.noreply.github.com>
Date: Mon, 5 Jan 2026 21:02:56 -0800
Subject: [PATCH 3/5] changed exception

Co-authored-by: Evan Ugarte <36345325+evanugarte@users.noreply.github.com>
---
 printer/modules/collector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/printer/modules/collector.py b/printer/modules/collector.py
index 4800a2fe..54143675 100644
--- a/printer/modules/collector.py
+++ b/printer/modules/collector.py
@@ -92,8 +92,8 @@ def scrape_html(ip):
         try:
             pages_remaining = int(element.text.strip())
             metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(pages_remaining)
-        except Exception:
-            pass
+        except ValueError:
+            continue
 
 def scrape_snmp(ip_list, sleep_duration_minutes=5):
     while True:

From bc2ba0ecd86fd90b6b597a12d777c3798af54c3e Mon Sep 17 00:00:00 2001
From: evan <evanuxd@gmail.com>
Date: Sat, 10 Jan 2026 22:02:46 -0800
Subject: [PATCH 4/5] cleaning up

---
 printer/modules/collector.py | 5 +----
 printer/requirements.txt     | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/printer/modules/collector.py b/printer/modules/collector.py
index 54143675..d3096c7f 100644
--- a/printer/modules/collector.py
+++ b/printer/modules/collector.py
@@ -3,12 +3,9 @@
 import logging
 import json
 
+from bs4 import BeautifulSoup
 from pysnmp.hlapi import *
-
-# imports for parsing html
 import requests
-import bs4
-from bs4 import BeautifulSoup
 
 from modules.metrics import MetricsHandler
 
diff --git a/printer/requirements.txt b/printer/requirements.txt
index 08c3b73b..5af47a40 100644
--- a/printer/requirements.txt
+++ b/printer/requirements.txt
@@ -6,4 +6,4 @@ httpx==0.28.1
 requests==2.32.3
 pysnmp==4.4.12
 pyasn1==0.4.8
-bs4==0.0.2
\ No newline at end of file
+bs4==0.0.2

From a6f13506d360c3c2d96760094824652847522771 Mon Sep 17 00:00:00 2001
From: evan <evanuxd@gmail.com>
Date: Sat, 10 Jan 2026 22:07:33 -0800
Subject: [PATCH 5/5] imagine this works

---
 printer/modules/collector.py | 42 ++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/printer/modules/collector.py b/printer/modules/collector.py
index d3096c7f..3b66100a 100644
--- a/printer/modules/collector.py
+++ b/printer/modules/collector.py
@@ -63,32 +63,32 @@ def fetch_ips_from_config(config_file_path):
     except Exception:
         logging.exception(f"error opening config file")
 
-def scrape_html(ip):
-
-    url = "http://" + ip + "/"
-
+def scrape_html(ip: str):
+    url = f"http://{ip}/"
+    
     try:
-        page = requests.get(url, timeout=5)
-        page.raise_for_status()
-    except Exception:
+        response = requests.get(url, timeout=5)
+        response.raise_for_status()
+    except requests.RequestException:
         logging.exception("failed to fetch printer html page")
         return
 
-    soup = BeautifulSoup(page.content, 'html.parser')
-
-    content = soup.find_all('td')
-    text = "%"
-    ink_level = ""
-    for element in content:
-        if text in str(element.string):
-            ink_level = float((element.text.strip()).rstrip('%'))
-            metrics_handler.snmp_metric.labels(name="ink_level", ip=ip).set(ink_level)
-    content = soup.find_all('td', class_='tableDataCellStand width30')
-    pages_remaining = 0
-    for element in content:
+    soup = BeautifulSoup(response.content, 'html.parser')
+
+    ink_td = soup.find('td', string=lambda s: s and '%' in s)
+    if ink_td:
+        try:
+            # Clean: strip whitespace, remove %, convert to float
+            level = float(ink_td.text.strip().rstrip('%'))
+            metrics_handler.snmp_metric.labels(name="ink_level", ip=ip).set(level)
+        except ValueError:
+            logging.warning(f"Could not parse ink level from: {ink_td.text}")
+
+    page_cells = soup.select('td.tableDataCellStand.width30')
+    for cell in page_cells:
         try:
-            pages_remaining = int(element.text.strip())
-            metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(pages_remaining)
+            count = int(cell.text.strip())
+            metrics_handler.snmp_metric.labels(name="pages_remaining", ip=ip).set(count)
         except ValueError:
             continue