From d94e3a0c995508a271e6f6e5890b6d60ccc7a631 Mon Sep 17 00:00:00 2001 From: Nora Goodman Date: Mon, 26 May 2025 18:36:11 -0400 Subject: [PATCH 1/4] Include scraper.py --- .gitignore | 3 ++- scraper/scraper.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 scraper/scraper.py diff --git a/.gitignore b/.gitignore index 43271eb..c01dab5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ scraper/** !scraper/*.py .venv/ __pycache__/ -static/ \ No newline at end of file +static/ +posts/ \ No newline at end of file diff --git a/scraper/scraper.py b/scraper/scraper.py new file mode 100644 index 0000000..ceb6d17 --- /dev/null +++ b/scraper/scraper.py @@ -0,0 +1,18 @@ +# wait between 1050 and 1150 seconds +import instaloader +from time import sleep +from random import randint +from os import listdir + +L = instaloader.Instaloader() + +L.load_session_from_file("down.loader54321") + +profile_of_interest = instaloader.Profile.from_username(L.context, "rocfnb") +files = listdir('.') +for post in profile_of_interest.get_posts(): + if not post.shortcode in files : + L.download_post(post, target=post.shortcode) + sleep(randint(1050, 1150)) + else: + print("Already downloaded " + post.shortcode +", skipping...") \ No newline at end of file From 82611c358fa3894f121897edd7e3e43c5c8f2773 Mon Sep 17 00:00:00 2001 From: Nora Goodman Date: Tue, 27 May 2025 16:11:56 -0400 Subject: [PATCH 2/4] Update list of links --- templates/index.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/index.html b/templates/index.html index 9400839..84fb92f 100644 --- a/templates/index.html +++ b/templates/index.html @@ -12,16 +12,16 @@ Solidarity not charity: From Rochester, New York.

Quick resources

From f170d2ca9036bf67a467fd59045f14fb3a461dcd Mon Sep 17 00:00:00 2001 From: Nora Goodman Date: Tue, 27 May 2025 16:21:15 -0400 Subject: [PATCH 3/4] Fix some directory issues --- style.css => assets/style.css | 0 builder.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) rename style.css => assets/style.css (100%) diff --git a/style.css b/assets/style.css similarity index 100% rename from style.css rename to assets/style.css diff --git a/builder.py b/builder.py index d1b4909..ffcb0a1 100644 --- a/builder.py +++ b/builder.py @@ -5,6 +5,7 @@ import re from pytz import timezone from datetime import datetime import bisect +import shutil from jinja2 import Environment, PackageLoader, select_autoescape scraper_path = 'scraper/' @@ -60,13 +61,16 @@ for folder in os.listdir(scraper_path): post['caption'] = file.read() if ('timestamp' in post.keys()): - with open(output_path + 'posts/' + str(post['timestamp']) +'.html', 'w+') as output_file: + with open('posts/' + str(post['timestamp']) +'.html', 'w+') as output_file: output_file.write(post_template.render(post=post)) homepage_template = env.get_template("index.html") -with open(output_path + 'index.html', "w+") as output_file: +with open(output_path + '/index.html', "w+") as output_file: output_file.write(homepage_template.render()) -with open(output_path + 'blog.html', "w+") as output_file: - output_file.write(blog_template.render(posts=posts)) \ No newline at end of file +with open(output_path + '/blog.html', "w+") as output_file: + output_file.write(blog_template.render(posts=posts)) + +# copy all assets to be in the output +shutil.copytree('assets/', 'static/', dirs_exist_ok=True) \ No newline at end of file From a554893b3b5244bd56a6a56f30e9863f664f5925 Mon Sep 17 00:00:00 2001 From: Nora Goodman Date: Tue, 27 May 2025 16:24:31 -0400 Subject: [PATCH 4/4] Remove link to blog (for now) --- templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/index.html b/templates/index.html index 84fb92f..f2ddb84 100644 --- a/templates/index.html +++ b/templates/index.html @@ -49,5 +49,5 @@ Solidarity not charity: From Rochester, New York.

-

See what we're up to :-)

+ {% endblock %} \ No newline at end of file