Uptodatecrawler/Main.py

37 lines
1.0 KiB
Python
Raw Normal View History

2018-08-10 18:13:15 +08:00
import scrapy
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import html5lib
import re
import sys
up_search_url = "https://www.uptodate.com/contents/search?search="
def do_uptodate_search_with_gecko(key_word):
print(up_search_url + key_word)
driver = webdriver.Firefox()
driver.get(up_search_url + key_word)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
articles_links = soup.find_all(".search-results") #Still unable to catch the wanted result
for links in articles_links:
print(links)
def do_uptodate_search(key_word):
print(up_search_url + key_word)
driver = webdriver.Firefox()
driver.get(up_search_url + key_word)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
print(html)
articles_links = soup.find_all("div.search-result")
for link in articles_links:
print(link)
if __name__ == '__main__':
key_word = input("Please enter your keyword: ")
do_uptodate_search_with_gecko(key_word)