Skip to content

Commit ef7bca6

Browse files
committed
check if server name in nginx config can be reached
1 parent 55a2279 commit ef7bca6

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created by PyCharm.
5+
File Name: LinuxBashShellScriptForOps:get-site-info.py
6+
Version: 0.0.1
7+
Author: dgden
8+
Author Email: dgdenterprise@gmail.com
9+
URL: https://github.com/DingGuodong/LinuxBashShellScriptForOps
10+
Download URL: https://github.com/DingGuodong/LinuxBashShellScriptForOps/tarball/master
11+
Create Date: 2021/4/15
12+
Create Time: 19:47
13+
Description: check if server name in nginx config can be reached
14+
Long Description:
15+
References:
16+
Prerequisites: pip install requests
17+
pip install bs4
18+
Development Status: 3 - Alpha, 5 - Production/Stable
19+
Environment: Console
20+
Intended Audience: System Administrators, Developers, End Users/Desktop
21+
License: Freeware, Freely Distributable
22+
Natural Language: English, Chinese (Simplified)
23+
Operating System: POSIX :: Linux, Microsoft :: Windows
24+
Programming Language: Python :: 2.6
25+
Programming Language: Python :: 2.7
26+
Topic: Utilities
27+
"""
28+
import requests
29+
from bs4 import BeautifulSoup
30+
31+
32+
def get_site_title_from_html(url):
33+
# headers = {
34+
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
35+
# 'Chrome/89.0.4389.114 Safari/537.36',
36+
# 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,"
37+
# "application/signed-exchange;v=b3;q=0.9",
38+
# 'Accept-Encoding': "gzip, deflate, br",
39+
# 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6,mt;q=0.5',
40+
# }
41+
42+
headers = {
43+
'User-Agent': 'curl/7.55.1',
44+
}
45+
46+
try:
47+
response = requests.request("GET", url, headers=headers, allow_redirects=True, timeout=(5, 10))
48+
except requests.exceptions.ConnectionError as e:
49+
# print((url, e))
50+
return False, "requests can not reached. " + str(e)
51+
except requests.exceptions.Timeout:
52+
return False, "requests timeout"
53+
except requests.exceptions.TooManyRedirects:
54+
# TODO(DingGuodong) set allow_redirects=False, then get next url from response.headers["Location"]
55+
return False, "too many redirects"
56+
except Exception as e:
57+
return False, str(e)
58+
59+
if response.ok:
60+
response.encoding = 'utf-8' # support 'utf-8' only, do not use `chardet`
61+
wanted_html = response.text
62+
soup = BeautifulSoup(wanted_html, 'html.parser')
63+
title = soup.find('title')
64+
if title is not None:
65+
title = title.get_text()
66+
else:
67+
title = "not found"
68+
return True, title
69+
else:
70+
return False, "requests fail"
71+
72+
73+
def parse_data_file(filename):
74+
sep = ' '
75+
with open(filename, 'r') as fp:
76+
for line in fp.readlines():
77+
line = line.strip()
78+
if sep in line:
79+
line = line.split(sep)
80+
yield line
81+
82+
83+
def check_site_status(hostname):
84+
if isinstance(hostname, list):
85+
# all sites use same site title when they are same group(list)
86+
hostname_list = list()
87+
requests_status_list = list()
88+
is_success = False
89+
site_title = "not found"
90+
for child in hostname:
91+
url = 'https://' + child
92+
is_success, site_title = get_site_title_from_html(url)
93+
if is_success:
94+
hostname_list.append(child)
95+
else:
96+
url = 'http://' + child
97+
is_success, site_title = get_site_title_from_html(url)
98+
if not is_success:
99+
hostname_list.append(child + "(fail)")
100+
else:
101+
hostname_list.append(child + '(http)')
102+
requests_status_list.append(is_success)
103+
print(" ".join(sorted(hostname_list)), "success" if all(requests_status_list) else 'fail', site_title)
104+
105+
else:
106+
url = 'https://' + hostname
107+
is_success, site_title = get_site_title_from_html(url)
108+
if is_success:
109+
print(hostname, "success", site_title)
110+
else:
111+
url = 'http://' + hostname
112+
is_success, site_title = get_site_title_from_html(url)
113+
if is_success:
114+
print(hostname + '(http)', 'success', site_title)
115+
else:
116+
print(hostname, "fail", site_title) # `site_title` contains the reason
117+
118+
119+
if __name__ == '__main__':
120+
# for item in parse_data_file("web-nginx-config-server-name.txt"):
121+
# check_site_status(item)
122+
123+
check_site_status("github.com")

0 commit comments

Comments
 (0)