Add files via upload
This commit is contained in:
parent
3323b570f4
commit
3a4a642921
2 changed files with 181 additions and 0 deletions
178
main.py
Normal file
178
main.py
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# Author: nloginov
|
||||||
|
# Script Name: Discord Favorite Gif Downloader
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse, unquote
|
||||||
|
import os
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
# Global counters
|
||||||
|
total_urls = 0
|
||||||
|
successful_downloads = 0
|
||||||
|
failed_downloads = 0
|
||||||
|
|
||||||
|
def ensure_directory(directory):
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
|
||||||
|
def extract_and_fix_urls(text):
|
||||||
|
pattern = r'https?:?/+[a-zA-Z0-9\-._~:/?#[\]@!$&\'()*+,;=%]+'
|
||||||
|
urls = re.findall(pattern, text)
|
||||||
|
|
||||||
|
fixed_urls = []
|
||||||
|
for url in urls:
|
||||||
|
if url.startswith('http/'):
|
||||||
|
url = 'http://' + url[5:]
|
||||||
|
elif url.startswith('https/'):
|
||||||
|
url = 'https://' + url[6:]
|
||||||
|
|
||||||
|
url = re.sub(r'^(https?:)/+', r'\1//', url)
|
||||||
|
|
||||||
|
if 'discordapp.net/external/' in url:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
query = parsed.path.split('/')[-1]
|
||||||
|
if query.startswith('http'):
|
||||||
|
url = unquote(query)
|
||||||
|
|
||||||
|
fixed_urls.append(url)
|
||||||
|
|
||||||
|
return fixed_urls
|
||||||
|
|
||||||
|
def get_tenor_gif_url(tenor_url):
|
||||||
|
try:
|
||||||
|
response = requests.get(tenor_url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
gif_element = soup.select_one('div.Gif img')
|
||||||
|
if gif_element and 'src' in gif_element.attrs:
|
||||||
|
return gif_element['src']
|
||||||
|
|
||||||
|
meta_content_url = soup.select_one('meta[itemprop="contentUrl"]')
|
||||||
|
if meta_content_url and 'content' in meta_content_url.attrs:
|
||||||
|
return meta_content_url['content']
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
pass # Silently handle the error
|
||||||
|
return None
|
||||||
|
|
||||||
|
def download_media(url):
|
||||||
|
global successful_downloads, failed_downloads
|
||||||
|
try:
|
||||||
|
if url.lower().endswith(('.gif', '.mp4', '.png')):
|
||||||
|
direct_url = url
|
||||||
|
elif 'tenor.com' in url:
|
||||||
|
gif_url = get_tenor_gif_url(url)
|
||||||
|
if gif_url:
|
||||||
|
direct_url = gif_url
|
||||||
|
else:
|
||||||
|
logger.debug(f"Skipped Tenor URL: {url}")
|
||||||
|
failed_downloads += 1
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
direct_url = url
|
||||||
|
|
||||||
|
response = requests.get(direct_url, timeout=10, allow_redirects=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
content_type = response.headers.get('Content-Type', '').lower()
|
||||||
|
|
||||||
|
if 'image/gif' in content_type or direct_url.lower().endswith('.gif'):
|
||||||
|
extension = '.gif'
|
||||||
|
subfolder = 'gif'
|
||||||
|
elif 'video/mp4' in content_type or direct_url.lower().endswith('.mp4'):
|
||||||
|
extension = '.mp4'
|
||||||
|
subfolder = 'mp4'
|
||||||
|
elif 'image/png' in content_type or direct_url.lower().endswith('.png'):
|
||||||
|
extension = '.png'
|
||||||
|
subfolder = 'gif'
|
||||||
|
else:
|
||||||
|
logger.debug(f"Skipped unsupported content type: {content_type} for URL: {direct_url}")
|
||||||
|
failed_downloads += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
parsed_url = urlparse(unquote(direct_url))
|
||||||
|
filename = os.path.basename(parsed_url.path)
|
||||||
|
filename, _ = os.path.splitext(filename)
|
||||||
|
|
||||||
|
if not filename or filename == extension:
|
||||||
|
path_parts = parsed_url.path.rstrip('/').split('/')
|
||||||
|
filename = path_parts[-1] if path_parts else 'unnamed'
|
||||||
|
|
||||||
|
filename = re.sub(r'[^\w\-_\. ]', '_', filename)
|
||||||
|
filename = filename.strip() or 'unnamed'
|
||||||
|
filename += extension
|
||||||
|
|
||||||
|
download_dir = os.path.join('downloaded', subfolder)
|
||||||
|
ensure_directory(download_dir)
|
||||||
|
|
||||||
|
counter = 1
|
||||||
|
original_filename = filename
|
||||||
|
while os.path.exists(os.path.join(download_dir, filename)):
|
||||||
|
name, ext = os.path.splitext(original_filename)
|
||||||
|
filename = f"{name}_{counter}{ext}"
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
full_path = os.path.join(download_dir, filename)
|
||||||
|
with open(full_path, 'wb') as file:
|
||||||
|
file.write(response.content)
|
||||||
|
successful_downloads += 1
|
||||||
|
progress = (successful_downloads + failed_downloads) / total_urls * 100
|
||||||
|
logger.info(f"Downloaded: {filename} ({progress:.1f}% complete)")
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
if isinstance(e, requests.exceptions.HTTPError) and e.response.status_code == 404:
|
||||||
|
logger.debug(f"404 Not Found: {url}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Failed to download: {url}")
|
||||||
|
failed_downloads += 1
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global total_urls
|
||||||
|
with open('encoded_file.txt', 'r', encoding='utf-8') as file:
|
||||||
|
content = file.read()
|
||||||
|
|
||||||
|
try:
|
||||||
|
decoded_content = base64.b64decode(content).decode('utf-8', errors='ignore')
|
||||||
|
except (base64.binascii.Error, UnicodeDecodeError):
|
||||||
|
logger.warning("Content is not valid base64 or couldn't be decoded. Using raw content.")
|
||||||
|
decoded_content = content
|
||||||
|
|
||||||
|
urls = extract_and_fix_urls(decoded_content)
|
||||||
|
total_urls = len(urls)
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
download_media(url)
|
||||||
|
|
||||||
|
# Print statistics
|
||||||
|
logger.info("\n--- Download Statistics ---")
|
||||||
|
logger.info(f"Total URLs processed: {total_urls}")
|
||||||
|
logger.info(f"Successful downloads: {successful_downloads}")
|
||||||
|
logger.info(f"Failed downloads: {failed_downloads}")
|
||||||
|
logger.info(f"Success rate: {successful_downloads/total_urls*100:.1f}%")
|
||||||
|
|
||||||
|
# Pause for 10 seconds
|
||||||
|
logger.info("\nScript finished. Closing in 10 seconds...")
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
requests
|
||||||
|
bs4
|
||||||
|
logging
|
Loading…
Reference in a new issue