Uptodatecrawler/Main.py

import scrapy
from bs4 import BeautifulSoup
import requests
import html5lib
import re
import sys
import json

#Def the Uptodate URI for use
up_search_url = "https://www.uptodate.com/contents/search?search="
up_api_url = "https://www.uptodate.com/services/app/contents/search/2/json?&language=en&max=10&search="
up_prefix_url = "https://www.uptodate.com"
up_content_prefix_url = "https://www.uptodate.com/services/app/contents/topic/"
up_result_title_list = []
up_result_url_list = []


def do_uptodate_search_with_uptodate_api(key_word):
    search_results = requests.get(up_api_url + key_word)
    return(search_results.json())

def uptodate_full_content_get(processed_title):
    full_content = requests.get(up_content_prefix_url + processed_title + "/print/json")
    return(full_content.json())

def uptodate_title_process(title):
    hyphen_title = title.replace(" ", "-")
    return (hyphen_title.lower())

if __name__ == '__main__':
    key_word = input("Please enter your keyword: ")
    up_search_result = do_uptodate_search_with_uptodate_api(key_word)
    for searchResults in up_search_result["data"]["searchResults"]:
        if searchResults["type"] == "medical":
            up_result_title_list.append(searchResults["title"])
            #up_result_url_list.append(searchResults["url"])

    for index, element in enumerate(up_result_title_list):
        up_result_title_list[index] = uptodate_title_process(element)

    print(up_result_title_list)

    article_full_content = (uptodate_full_content_get(up_result_title_list[0]))
    print(article_full_content["data"]["printHtml"])