Mikee
Active Member
- Jul 8, 2017
- 162
- 102
Code:
#Instagram Thief
#scape the top page every hour, check every photo on the top page, compare each of their likes
#download the photo with the highest votes. Do this every hour, also keep track of the tags that they used
#upload the photo i downloaded, to my account and put the same tags in them
#after 24 hours, record which photos got most likes, and record their tags in a JSON File.
#do this every day
from bs4 import BeautifulSoup
import urllib3
class InstagramPhoto(object):
top_page_text = None
def __init__(self):
self.data = None
@staticmethod
def get_top_page():
try:
http = urllib3.PoolManager()
r = http.request("GET", "https://www.instagram.com/explore/")
print (r.data.decode('utf-8'))
except Exception as e:
print("\nAn Error With UrlLib3 Has Occured...\n\n",e,"\n")
return
'''
def find_top_photo(self):
try:
if self.top_page_text is None:
raise Exception
except Exception:
print("Woops, The Top Instagram Page Was Not Yet Accessed !")
return
text = "<div class = _mck9w _gvoze _f2mse> hey we have some text"
soup = BeautifulSoup(self.top_page_text, "html.parser")
print(soup.prettify())
'''
def main():
get_top_page = InstagramPhoto.get_top_page()
'''
new_photo = InstagramPhoto() #creating an instance of the new_photo that we wanna get
new_photo.find_top_photo()
'''
if __name__ == "__main__":
main()
The print doesn't return the full source. It literally skips the <body> which is what I need. Does anyone know how I can get around this?
Thanks.
I've tried using the requests module but it literally does the same thing.