diff --git a/main.py b/main.py new file mode 100644 index 0000000..f12d5bc --- /dev/null +++ b/main.py @@ -0,0 +1,178 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Author: nloginov +# Script Name: Discord Favorite Gif Downloader + +import base64 +import re +import requests +from urllib.parse import urlparse, unquote +import os +from bs4 import BeautifulSoup +import logging +import time + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger() + +# Global counters +total_urls = 0 +successful_downloads = 0 +failed_downloads = 0 + +def ensure_directory(directory): + if not os.path.exists(directory): + os.makedirs(directory) + +def extract_and_fix_urls(text): + pattern = r'https?:?/+[a-zA-Z0-9\-._~:/?#[\]@!$&\'()*+,;=%]+' + urls = re.findall(pattern, text) + + fixed_urls = [] + for url in urls: + if url.startswith('http/'): + url = 'http://' + url[5:] + elif url.startswith('https/'): + url = 'https://' + url[6:] + + url = re.sub(r'^(https?:)/+', r'\1//', url) + + if 'discordapp.net/external/' in url: + parsed = urlparse(url) + query = parsed.path.split('/')[-1] + if query.startswith('http'): + url = unquote(query) + + fixed_urls.append(url) + + return fixed_urls + +def get_tenor_gif_url(tenor_url): + try: + response = requests.get(tenor_url, timeout=10) + response.raise_for_status() + soup = BeautifulSoup(response.text, 'html.parser') + + gif_element = soup.select_one('div.Gif img') + if gif_element and 'src' in gif_element.attrs: + return gif_element['src'] + + meta_content_url = soup.select_one('meta[itemprop="contentUrl"]') + if meta_content_url and 'content' in meta_content_url.attrs: + return meta_content_url['content'] + + except requests.exceptions.RequestException: + pass # Silently handle the error + return None + +def download_media(url): + global successful_downloads, failed_downloads + try: + if url.lower().endswith(('.gif', '.mp4', '.png')): + direct_url = url + elif 'tenor.com' in url: + gif_url = get_tenor_gif_url(url) + if gif_url: + direct_url = gif_url + else: + logger.debug(f"Skipped Tenor URL: {url}") + failed_downloads += 1 + return + else: + direct_url = url + + response = requests.get(direct_url, timeout=10, allow_redirects=True) + response.raise_for_status() + + content_type = response.headers.get('Content-Type', '').lower() + + if 'image/gif' in content_type or direct_url.lower().endswith('.gif'): + extension = '.gif' + subfolder = 'gif' + elif 'video/mp4' in content_type or direct_url.lower().endswith('.mp4'): + extension = '.mp4' + subfolder = 'mp4' + elif 'image/png' in content_type or direct_url.lower().endswith('.png'): + extension = '.png' + subfolder = 'gif' + else: + logger.debug(f"Skipped unsupported content type: {content_type} for URL: {direct_url}") + failed_downloads += 1 + return + + parsed_url = urlparse(unquote(direct_url)) + filename = os.path.basename(parsed_url.path) + filename, _ = os.path.splitext(filename) + + if not filename or filename == extension: + path_parts = parsed_url.path.rstrip('/').split('/') + filename = path_parts[-1] if path_parts else 'unnamed' + + filename = re.sub(r'[^\w\-_\. ]', '_', filename) + filename = filename.strip() or 'unnamed' + filename += extension + + download_dir = os.path.join('downloaded', subfolder) + ensure_directory(download_dir) + + counter = 1 + original_filename = filename + while os.path.exists(os.path.join(download_dir, filename)): + name, ext = os.path.splitext(original_filename) + filename = f"{name}_{counter}{ext}" + counter += 1 + + full_path = os.path.join(download_dir, filename) + with open(full_path, 'wb') as file: + file.write(response.content) + successful_downloads += 1 + progress = (successful_downloads + failed_downloads) / total_urls * 100 + logger.info(f"Downloaded: {filename} ({progress:.1f}% complete)") + except requests.exceptions.RequestException as e: + if isinstance(e, requests.exceptions.HTTPError) and e.response.status_code == 404: + logger.debug(f"404 Not Found: {url}") + else: + logger.warning(f"Failed to download: {url}") + failed_downloads += 1 + +def main(): + global total_urls + with open('encoded_file.txt', 'r', encoding='utf-8') as file: + content = file.read() + + try: + decoded_content = base64.b64decode(content).decode('utf-8', errors='ignore') + except (base64.binascii.Error, UnicodeDecodeError): + logger.warning("Content is not valid base64 or couldn't be decoded. Using raw content.") + decoded_content = content + + urls = extract_and_fix_urls(decoded_content) + total_urls = len(urls) + + for url in urls: + download_media(url) + + # Print statistics + logger.info("\n--- Download Statistics ---") + logger.info(f"Total URLs processed: {total_urls}") + logger.info(f"Successful downloads: {successful_downloads}") + logger.info(f"Failed downloads: {failed_downloads}") + logger.info(f"Success rate: {successful_downloads/total_urls*100:.1f}%") + + # Pause for 10 seconds + logger.info("\nScript finished. Closing in 10 seconds...") + time.sleep(10) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..adbe886 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +bs4 +logging \ No newline at end of file