2018-08-10 18:13:15 +08:00
|
|
|
import scrapy
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import requests
|
|
|
|
from selenium import webdriver
|
|
|
|
import html5lib
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
up_search_url = "https://www.uptodate.com/contents/search?search="
|
|
|
|
|
|
|
|
def do_uptodate_search_with_gecko(key_word):
|
|
|
|
print(up_search_url + key_word)
|
|
|
|
driver = webdriver.Firefox()
|
|
|
|
driver.get(up_search_url + key_word)
|
|
|
|
html = driver.page_source
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
articles_links = soup.find_all(".search-results") #Still unable to catch the wanted result
|
|
|
|
for links in articles_links:
|
|
|
|
print(links)
|
|
|
|
|
|
|
|
def do_uptodate_search(key_word):
|
|
|
|
print(up_search_url + key_word)
|
|
|
|
driver = webdriver.Firefox()
|
|
|
|
driver.get(up_search_url + key_word)
|
|
|
|
html = driver.page_source
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
print(html)
|
|
|
|
articles_links = soup.find_all("div.search-result")
|
|
|
|
|
|
|
|
for link in articles_links:
|
|
|
|
print(link)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
key_word = input("Please enter your keyword: ")
|
|
|
|
do_uptodate_search_with_gecko(key_word)
|