Update main.py
This commit is contained in:
parent
6a2ff0fe72
commit
3f80caccbf
1 changed files with 303 additions and 294 deletions
51
main.py
51
main.py
|
@ -21,15 +21,15 @@ import requests
|
|||
from urllib.parse import urlparse, unquote
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
# import logging
|
||||
import time
|
||||
import json
|
||||
from collections import defaultdict
|
||||
import hashlib
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
logger = logging.getLogger()
|
||||
# logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
# logger = logging.getLogger()
|
||||
|
||||
# Global counters and error tracking
|
||||
total_urls = 0
|
||||
|
@ -151,7 +151,10 @@ def download_media(url):
|
|||
if imgur_urls:
|
||||
if isinstance(imgur_urls, list): # It's an album
|
||||
for imgur_url in imgur_urls:
|
||||
try:
|
||||
download_media(imgur_url) # Recursive call for each image in the album
|
||||
except:
|
||||
print("what the wow?")
|
||||
return
|
||||
else: # Single image/video
|
||||
direct_url = imgur_urls
|
||||
|
@ -160,7 +163,10 @@ def download_media(url):
|
|||
error_summary["Imgur URL skipped"].append(url)
|
||||
return
|
||||
elif 'tenor.com' in url:
|
||||
try:
|
||||
gif_url = get_tenor_gif_url(url)
|
||||
except:
|
||||
gif_url = ""
|
||||
if gif_url:
|
||||
direct_url = gif_url
|
||||
else:
|
||||
|
@ -208,7 +214,7 @@ def download_media(url):
|
|||
file.write(response.content)
|
||||
successful_downloads += 1
|
||||
progress = (successful_downloads + failed_downloads) / total_urls * 100
|
||||
logger.info(f"Downloaded: {filename} ({progress:.1f}% complete)")
|
||||
print(f"Downloaded: {filename} ({progress:.1f}% complete)")
|
||||
except requests.exceptions.RequestException as e:
|
||||
failed_downloads += 1
|
||||
if isinstance(e, requests.exceptions.HTTPError):
|
||||
|
@ -227,14 +233,14 @@ def read_input_file(file_path):
|
|||
if 'settings' in json_data:
|
||||
content = json_data['settings']
|
||||
else:
|
||||
logger.warning("JSON file does not contain 'settings' key. Using raw content.")
|
||||
print("JSON file does not contain 'settings' key. Using raw content.")
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Invalid JSON format. Using raw content.")
|
||||
print("Invalid JSON format. Using raw content.")
|
||||
|
||||
try:
|
||||
decoded_content = base64.b64decode(content).decode('utf-8', errors='ignore')
|
||||
except (base64.binascii.Error, UnicodeDecodeError):
|
||||
logger.warning("Content is not valid base64 or couldn't be decoded. Using raw content.")
|
||||
print("Content is not valid base64 or couldn't be decoded. Using raw content.")
|
||||
decoded_content = content
|
||||
|
||||
return decoded_content
|
||||
|
@ -244,7 +250,7 @@ def get_input_file():
|
|||
filename = f'data.{ext}'
|
||||
if os.path.exists(filename):
|
||||
return filename
|
||||
logger.error("No valid input file found. Please ensure 'data.txt' or 'data.json' exists.\nNote: If your filename is 'data.txt', only raw data from 'settings' key must be inside of it.")
|
||||
print("No valid input file found. Please ensure 'data.txt' or 'data.json' exists.\nNote: If your filename is 'data.txt', only raw data from 'settings' key must be inside of it.")
|
||||
return None
|
||||
|
||||
def main():
|
||||
|
@ -260,34 +266,37 @@ def main():
|
|||
total_urls = len(urls)
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
download_media(url)
|
||||
except:
|
||||
print("what the wow?")
|
||||
|
||||
# Print statistics
|
||||
logger.info("\n--- Download Statistics ---")
|
||||
logger.info(f"Total URLs processed: {total_urls}")
|
||||
logger.info(f"Successful downloads: {successful_downloads}")
|
||||
logger.info(f"Failed downloads: {failed_downloads}")
|
||||
logger.info(f"Success rate: {successful_downloads/total_urls*100:.1f}%")
|
||||
print("\n--- Download Statistics ---")
|
||||
print(f"Total URLs processed: {total_urls}")
|
||||
print(f"Successful downloads: {successful_downloads}")
|
||||
print(f"Failed downloads: {failed_downloads}")
|
||||
print(f"Success rate: {successful_downloads/total_urls*100:.1f}%")
|
||||
|
||||
# Print error summary
|
||||
if error_summary:
|
||||
logger.info("\n--- Error Summary ---")
|
||||
print("\n--- Error Summary ---")
|
||||
for error_type, urls in error_summary.items():
|
||||
logger.info(f"{error_type}: {len(urls)} occurences")
|
||||
print(f"{error_type}: {len(urls)} occurences")
|
||||
if error_type == "HTTP 404":
|
||||
logger.info("Sample URLs (max 5):")
|
||||
print("Sample URLs (max 5):")
|
||||
for url in urls[:5]:
|
||||
logger.info(f" - {url}")
|
||||
print(f" - {url}")
|
||||
elif len(urls) <= 5:
|
||||
for url in urls:
|
||||
logger.info(f" - {url}")
|
||||
print(f" - {url}")
|
||||
else:
|
||||
logger.info(f" (Showing first 5 of {len(urls)} errors)")
|
||||
print(f" (Showing first 5 of {len(urls)} errors)")
|
||||
for url in urls[:5]:
|
||||
logger.info(f" - {url}")
|
||||
print(f" - {url}")
|
||||
|
||||
# Pause for 10 seconds
|
||||
logger.info("\nScript finished. Exiting in 10 seconds...")
|
||||
print("\nScript finished. Exiting in 10 seconds...")
|
||||
time.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in a new issue