How Do I get Around Instagram Blocking Web Crawlers

Mikee

Active Member
Jul 8, 2017
162
102
Code:
#Instagram Thief
#scape the top page every hour, check every photo  on the top page, compare each of their likes
#download the photo with the highest votes. Do this every hour, also keep track of the tags that they used
#upload the photo i downloaded, to my account and put the same tags in them
#after 24 hours, record which photos got most likes, and record their tags in a JSON File.
#do this every day
from bs4 import BeautifulSoup
import urllib3


class InstagramPhoto(object):
    top_page_text = None
 
    def __init__(self):
        self.data = None


    @staticmethod
    def get_top_page():
        try:
            http = urllib3.PoolManager()
            r = http.request("GET", "https://www.instagram.com/explore/")
            print (r.data.decode('utf-8'))
        except Exception as e:
            print("\nAn Error With UrlLib3 Has Occured...\n\n",e,"\n")
            return

    '''
    def find_top_photo(self):
        try:
            if self.top_page_text is None:
                raise Exception
        except Exception:
            print("Woops, The Top Instagram Page Was Not Yet Accessed !")
            return
        text = "<div class = _mck9w _gvoze _f2mse> hey we have some text"
        soup = BeautifulSoup(self.top_page_text, "html.parser")
        print(soup.prettify())
       '''
         
     
def main():
    get_top_page = InstagramPhoto.get_top_page()
    '''
    new_photo = InstagramPhoto() #creating an instance of the new_photo that we wanna get
    new_photo.find_top_photo()
    '''

if __name__ == "__main__":
    main()


The print doesn't return the full source. It literally skips the <body> which is what I need. Does anyone know how I can get around this?
Thanks.

I've tried using the requests module but it literally does the same thing.
 

Users who are viewing this thread

Top