|
| 1 | +#!/usr/bin/python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +Created by PyCharm. |
| 5 | +File Name: LinuxBashShellScriptForOps:get-site-info.py |
| 6 | +Version: 0.0.1 |
| 7 | +Author: dgden |
| 8 | +Author Email: dgdenterprise@gmail.com |
| 9 | +URL: https://github.com/DingGuodong/LinuxBashShellScriptForOps |
| 10 | +Download URL: https://github.com/DingGuodong/LinuxBashShellScriptForOps/tarball/master |
| 11 | +Create Date: 2021/4/15 |
| 12 | +Create Time: 19:47 |
| 13 | +Description: check if server name in nginx config can be reached |
| 14 | +Long Description: |
| 15 | +References: |
| 16 | +Prerequisites: pip install requests |
| 17 | + pip install bs4 |
| 18 | +Development Status: 3 - Alpha, 5 - Production/Stable |
| 19 | +Environment: Console |
| 20 | +Intended Audience: System Administrators, Developers, End Users/Desktop |
| 21 | +License: Freeware, Freely Distributable |
| 22 | +Natural Language: English, Chinese (Simplified) |
| 23 | +Operating System: POSIX :: Linux, Microsoft :: Windows |
| 24 | +Programming Language: Python :: 2.6 |
| 25 | +Programming Language: Python :: 2.7 |
| 26 | +Topic: Utilities |
| 27 | + """ |
| 28 | +import requests |
| 29 | +from bs4 import BeautifulSoup |
| 30 | + |
| 31 | + |
| 32 | +def get_site_title_from_html(url): |
| 33 | + # headers = { |
| 34 | + # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' |
| 35 | + # 'Chrome/89.0.4389.114 Safari/537.36', |
| 36 | + # 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8," |
| 37 | + # "application/signed-exchange;v=b3;q=0.9", |
| 38 | + # 'Accept-Encoding': "gzip, deflate, br", |
| 39 | + # 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6,mt;q=0.5', |
| 40 | + # } |
| 41 | + |
| 42 | + headers = { |
| 43 | + 'User-Agent': 'curl/7.55.1', |
| 44 | + } |
| 45 | + |
| 46 | + try: |
| 47 | + response = requests.request("GET", url, headers=headers, allow_redirects=True, timeout=(5, 10)) |
| 48 | + except requests.exceptions.ConnectionError as e: |
| 49 | + # print((url, e)) |
| 50 | + return False, "requests can not reached. " + str(e) |
| 51 | + except requests.exceptions.Timeout: |
| 52 | + return False, "requests timeout" |
| 53 | + except requests.exceptions.TooManyRedirects: |
| 54 | + # TODO(DingGuodong) set allow_redirects=False, then get next url from response.headers["Location"] |
| 55 | + return False, "too many redirects" |
| 56 | + except Exception as e: |
| 57 | + return False, str(e) |
| 58 | + |
| 59 | + if response.ok: |
| 60 | + response.encoding = 'utf-8' # support 'utf-8' only, do not use `chardet` |
| 61 | + wanted_html = response.text |
| 62 | + soup = BeautifulSoup(wanted_html, 'html.parser') |
| 63 | + title = soup.find('title') |
| 64 | + if title is not None: |
| 65 | + title = title.get_text() |
| 66 | + else: |
| 67 | + title = "not found" |
| 68 | + return True, title |
| 69 | + else: |
| 70 | + return False, "requests fail" |
| 71 | + |
| 72 | + |
| 73 | +def parse_data_file(filename): |
| 74 | + sep = ' ' |
| 75 | + with open(filename, 'r') as fp: |
| 76 | + for line in fp.readlines(): |
| 77 | + line = line.strip() |
| 78 | + if sep in line: |
| 79 | + line = line.split(sep) |
| 80 | + yield line |
| 81 | + |
| 82 | + |
| 83 | +def check_site_status(hostname): |
| 84 | + if isinstance(hostname, list): |
| 85 | + # all sites use same site title when they are same group(list) |
| 86 | + hostname_list = list() |
| 87 | + requests_status_list = list() |
| 88 | + is_success = False |
| 89 | + site_title = "not found" |
| 90 | + for child in hostname: |
| 91 | + url = 'https://' + child |
| 92 | + is_success, site_title = get_site_title_from_html(url) |
| 93 | + if is_success: |
| 94 | + hostname_list.append(child) |
| 95 | + else: |
| 96 | + url = 'http://' + child |
| 97 | + is_success, site_title = get_site_title_from_html(url) |
| 98 | + if not is_success: |
| 99 | + hostname_list.append(child + "(fail)") |
| 100 | + else: |
| 101 | + hostname_list.append(child + '(http)') |
| 102 | + requests_status_list.append(is_success) |
| 103 | + print(" ".join(sorted(hostname_list)), "success" if all(requests_status_list) else 'fail', site_title) |
| 104 | + |
| 105 | + else: |
| 106 | + url = 'https://' + hostname |
| 107 | + is_success, site_title = get_site_title_from_html(url) |
| 108 | + if is_success: |
| 109 | + print(hostname, "success", site_title) |
| 110 | + else: |
| 111 | + url = 'http://' + hostname |
| 112 | + is_success, site_title = get_site_title_from_html(url) |
| 113 | + if is_success: |
| 114 | + print(hostname + '(http)', 'success', site_title) |
| 115 | + else: |
| 116 | + print(hostname, "fail", site_title) # `site_title` contains the reason |
| 117 | + |
| 118 | + |
| 119 | +if __name__ == '__main__': |
| 120 | + # for item in parse_data_file("web-nginx-config-server-name.txt"): |
| 121 | + # check_site_status(item) |
| 122 | + |
| 123 | + check_site_status("github.com") |
0 commit comments