Update main.py

This commit is contained in:
Arma-Damna-Dillo 2025-01-16 02:31:55 +00:00
parent 6a2ff0fe72
commit 3f80caccbf

51
main.py
View file

@ -21,15 +21,15 @@ import requests
from urllib.parse import urlparse, unquote
import os
from bs4 import BeautifulSoup
import logging
# import logging
import time
import json
from collections import defaultdict
import hashlib
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger()
# logging.basicConfig(level=logging.INFO, format='%(message)s')
# logger = logging.getLogger()
# Global counters and error tracking
total_urls = 0
@ -151,7 +151,10 @@ def download_media(url):
if imgur_urls:
if isinstance(imgur_urls, list): # It's an album
for imgur_url in imgur_urls:
try:
download_media(imgur_url) # Recursive call for each image in the album
except:
print("what the wow?")
return
else: # Single image/video
direct_url = imgur_urls
@ -160,7 +163,10 @@ def download_media(url):
error_summary["Imgur URL skipped"].append(url)
return
elif 'tenor.com' in url:
try:
gif_url = get_tenor_gif_url(url)
except:
gif_url = ""
if gif_url:
direct_url = gif_url
else:
@ -208,7 +214,7 @@ def download_media(url):
file.write(response.content)
successful_downloads += 1
progress = (successful_downloads + failed_downloads) / total_urls * 100
logger.info(f"Downloaded: {filename} ({progress:.1f}% complete)")
print(f"Downloaded: {filename} ({progress:.1f}% complete)")
except requests.exceptions.RequestException as e:
failed_downloads += 1
if isinstance(e, requests.exceptions.HTTPError):
@ -227,14 +233,14 @@ def read_input_file(file_path):
if 'settings' in json_data:
content = json_data['settings']
else:
logger.warning("JSON file does not contain 'settings' key. Using raw content.")
print("JSON file does not contain 'settings' key. Using raw content.")
except json.JSONDecodeError:
logger.warning("Invalid JSON format. Using raw content.")
print("Invalid JSON format. Using raw content.")
try:
decoded_content = base64.b64decode(content).decode('utf-8', errors='ignore')
except (base64.binascii.Error, UnicodeDecodeError):
logger.warning("Content is not valid base64 or couldn't be decoded. Using raw content.")
print("Content is not valid base64 or couldn't be decoded. Using raw content.")
decoded_content = content
return decoded_content
@ -244,7 +250,7 @@ def get_input_file():
filename = f'data.{ext}'
if os.path.exists(filename):
return filename
logger.error("No valid input file found. Please ensure 'data.txt' or 'data.json' exists.\nNote: If your filename is 'data.txt', only raw data from 'settings' key must be inside of it.")
print("No valid input file found. Please ensure 'data.txt' or 'data.json' exists.\nNote: If your filename is 'data.txt', only raw data from 'settings' key must be inside of it.")
return None
def main():
@ -260,34 +266,37 @@ def main():
total_urls = len(urls)
for url in urls:
try:
download_media(url)
except:
print("what the wow?")
# Print statistics
logger.info("\n--- Download Statistics ---")
logger.info(f"Total URLs processed: {total_urls}")
logger.info(f"Successful downloads: {successful_downloads}")
logger.info(f"Failed downloads: {failed_downloads}")
logger.info(f"Success rate: {successful_downloads/total_urls*100:.1f}%")
print("\n--- Download Statistics ---")
print(f"Total URLs processed: {total_urls}")
print(f"Successful downloads: {successful_downloads}")
print(f"Failed downloads: {failed_downloads}")
print(f"Success rate: {successful_downloads/total_urls*100:.1f}%")
# Print error summary
if error_summary:
logger.info("\n--- Error Summary ---")
print("\n--- Error Summary ---")
for error_type, urls in error_summary.items():
logger.info(f"{error_type}: {len(urls)} occurences")
print(f"{error_type}: {len(urls)} occurences")
if error_type == "HTTP 404":
logger.info("Sample URLs (max 5):")
print("Sample URLs (max 5):")
for url in urls[:5]:
logger.info(f" - {url}")
print(f" - {url}")
elif len(urls) <= 5:
for url in urls:
logger.info(f" - {url}")
print(f" - {url}")
else:
logger.info(f" (Showing first 5 of {len(urls)} errors)")
print(f" (Showing first 5 of {len(urls)} errors)")
for url in urls[:5]:
logger.info(f" - {url}")
print(f" - {url}")
# Pause for 10 seconds
logger.info("\nScript finished. Exiting in 10 seconds...")
print("\nScript finished. Exiting in 10 seconds...")
time.sleep(10)
if __name__ == "__main__":