NSICollectionPlatform/NSICollectionPlatformServer/subdomainLookup.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent


def __get_http_headers():
    try:
        ua = UserAgent()
        header = {
            "User-Agent": ua.random,
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,zh-TW;q=0.6"
        }
    except Exception as e:
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36"}
    return header


def sub_domain_lookup(domain):
    domain_list = {}
    for n in range(1, 2):
        j = 1
        if n > 1:
            j = n * 10 - 1
        params = {
            "q": domain,
            "go": "搜索",
            "qs": "n",
            "form": "QBLH",
            "first": j
        }
        cookies = {
            "SRCHHPGUSR": "NRSLT=50"
        }
        res = requests.get("https://cn.bing.com/search", headers=__get_http_headers(), params=params, cookies=cookies)
        resp = res.content
        # BeautifulSoup匹配标题
        html = BeautifulSoup(resp, "html.parser")
        h2_list = html.find_all("h2", {"class": ""})
        for h2 in h2_list:
            if h2.find("a") is not None:
                href = h2.find("a")["href"]
                href = href.split("?")[0]
                domain_list[href] = h2.find("a").text
    return domain_list