2018-08-10 18:13:15 +08:00
|
|
|
import scrapy
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import requests
|
|
|
|
from selenium import webdriver
|
|
|
|
import html5lib
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
|
2018-08-10 21:11:35 +08:00
|
|
|
#Def the Uptodate URI for use
|
2018-08-10 18:13:15 +08:00
|
|
|
up_search_url = "https://www.uptodate.com/contents/search?search="
|
2018-08-10 21:11:35 +08:00
|
|
|
up_api_url = "https://www.uptodate.com/services/app/contents/search/2/json?&language=en&max=10&search="
|
|
|
|
up_prefix_url = "https://www.uptodate.com"
|
2018-08-10 18:13:15 +08:00
|
|
|
|
|
|
|
def do_uptodate_search_with_gecko(key_word):
|
|
|
|
print(up_search_url + key_word)
|
|
|
|
driver = webdriver.Firefox()
|
|
|
|
driver.get(up_search_url + key_word)
|
|
|
|
html = driver.page_source
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
articles_links = soup.find_all(".search-results") #Still unable to catch the wanted result
|
|
|
|
for links in articles_links:
|
|
|
|
print(links)
|
|
|
|
|
2018-08-10 21:11:35 +08:00
|
|
|
def do_uptodate_search_with_headless(key_word):
|
2018-08-10 18:13:15 +08:00
|
|
|
print(up_search_url + key_word)
|
2018-08-10 21:11:35 +08:00
|
|
|
option = webdriver.ChromeOptions()
|
|
|
|
option.add_argument('headless')
|
|
|
|
driver = webdriver.Chrome(chrome_options=option)
|
2018-08-10 18:13:15 +08:00
|
|
|
driver.get(up_search_url + key_word)
|
|
|
|
html = driver.page_source
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
2018-08-10 21:11:35 +08:00
|
|
|
articles_links = soup.select("#search-results-container")
|
|
|
|
print(articles_links)
|
2018-08-10 18:13:15 +08:00
|
|
|
|
2018-08-10 21:11:35 +08:00
|
|
|
def do_uptodate_search_with_uptodate_api(key_word):
|
|
|
|
search_results = requests.get(up_api_url + key_word)
|
|
|
|
print(search_results.json())
|
2018-08-10 18:13:15 +08:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
key_word = input("Please enter your keyword: ")
|
2018-08-10 21:11:35 +08:00
|
|
|
do_uptodate_search_with_uptodate_api(key_word)
|