diff --git a/FakeDetector.py b/FakeDetector.py new file mode 100644 index 0000000..daa4827 --- /dev/null +++ b/FakeDetector.py @@ -0,0 +1,463 @@ +#!/usr/bin/env python +# +# Fake detection script for NZBGet +# +# Copyright (C) 2014-2016 Andrey Prygunkov +# Copyright (C) 2014 Clinton Hall +# Copyright (C) 2014 JVM +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + +############################################################################## +### NZBGET QUEUE/POST-PROCESSING SCRIPT ### +### QUEUE EVENTS: NZB_ADDED, NZB_DOWNLOADED, FILE_DOWNLOADED + +# Detect nzbs with fake media files. +# +# If a fake is detected the download is marked as bad. NZBGet removes +# the download from queue and (if option "DeleteCleanupDisk" is active) the +# downloaded files are deleted from disk. If duplicate handling is active +# (option "DupeCheck") then another duplicate is chosen for download +# if available. +# +# The status "FAILURE/BAD" is passed to other scripts and informs them +# about failure. +# +# PP-Script version: 2.0. +# +# For more info and updates please visit forum topic at +# http://nzbget.net/forum/viewtopic.php?f=8&t=1394. +# +# NOTE: This script requires Python to be installed on your system (tested +# only with Python 3.x). + + +############################################################################## +### OPTIONS ### + +# Banned extensions. +# +# Downloads which contain files with any of the following extensions will be marked as fake. +# Extensions must be separated by a comma (eg: .wmv, .divx). +#BannedExtensions= + + +### NZBGET QUEUE/POST-PROCESSING SCRIPT ### +############################################################################## + + +import os +import sys +import subprocess +import re +import urllib.request, urllib.error, urllib.parse +from xmlrpc.client import ServerProxy +from base64 import b64encode +import shlex +import traceback + +# Exit codes used by NZBGet for post-processing scripts. +# Queue-scripts don't have any special exit codes. +POSTPROCESS_SUCCESS=93 +POSTPROCESS_NONE=95 +POSTPROCESS_ERROR=94 + +mediaExtensions = ['.mkv', '.avi', '.divx', '.xvid', '.mov', '.wmv', '.mp4', '.mpg', '.mpeg', '.vob', '.iso', '.m4v'] +bannedMediaExtensions = os.environ.get('NZBPO_BANNEDEXTENSIONS').replace(' ', '').split(',') + +verbose = False + +# Start up checks +def start_check(): + # Check if the script is called from a compatible NZBGet version (as queue-script or as pp-script) + if not ('NZBNA_EVENT' in os.environ or 'NZBPP_DIRECTORY' in os.environ) or not 'NZBOP_ARTICLECACHE' in os.environ: + print('*** NZBGet queue script ***') + print('This script is supposed to be called from nzbget (14.0 or later).') + sys.exit(1) + + # This script processes only certain queue events. + # For compatibility with newer NZBGet versions it ignores event types it doesn't know + if os.environ.get('NZBNA_EVENT') not in ['NZB_ADDED', 'FILE_DOWNLOADED', 'NZB_DOWNLOADED', None]: + sys.exit(0) + + # If nzb was already marked as bad don't do any further detection + if os.environ.get('NZBPP_STATUS') == 'FAILURE/BAD': + if os.environ.get('NZBPR_PPSTATUS_FAKE') == 'yes': + # Print the message again during post-processing to add it into the post-processing log + # (which is then can be used by notification scripts such as EMail.py) + # Pp-parameter "NZBPR_PPSTATUS_FAKEBAN" contains more details (saved previously by our script) + if os.environ.get('NZBPR_PPSTATUS_FAKEBAN') == None: + print('[WARNING] Download has media files and executables') + else: + print('[WARNING] Download contains banned extension ' + os.environ.get('NZBPR_PPSTATUS_FAKEBAN')) + clean_up() + sys.exit(POSTPROCESS_SUCCESS) + + # If called via "Post-process again" from history details dialog the download may not exist anymore + if 'NZBPP_DIRECTORY' in os.environ and not os.path.exists(os.environ.get('NZBPP_DIRECTORY')): + print('Destination directory doesn\'t exist, exiting') + clean_up() + sys.exit(POSTPROCESS_NONE) + + # If nzb is already failed, don't do any further detection + if os.environ.get('NZBPP_TOTALSTATUS') == 'FAILURE': + clean_up() + sys.exit(POSTPROCESS_NONE) + +# Check if media files present in the list of files +def contains_media(list): + for item in list: + if os.path.splitext(item)[1] in mediaExtensions: + return True + else: + continue + return False + +# Check if banned media files present in the list of files +def contains_banned_media(list): + for item in list: + if os.path.splitext(item)[1] in bannedMediaExtensions: + print('[INFO] Found file with banned extension: ' + item) + return os.path.splitext(item)[1] + else: + continue + return '' + +# Check if executable files present in the list of files +# Exception: rename.bat (.sh, .exe) are ignored, sometimes valid posts include them. +def contains_executable(list): + exExtensions = [ '.exe', '.bat', '.sh' ] + allowNames = [ 'rename', 'Rename' ] + excludePath = [ r'reverse', r'spiegelen' ] + for item in list: + ep = False + for ap in excludePath: + if re.search(ap, item, re.I): + ep = True + break + if ep: + continue + name, ext = os.path.splitext(item) + if os.path.split(name)[1] != "": + name = os.path.split(name)[1] + if ext == '.exe' or (ext in exExtensions and not name in allowNames): + print('[INFO] Found executable %s' % item) + return True + else: + continue + return False + +# Finds untested files, comparing all files and processed files in tmp_file +def get_latest_file(dir): + try: + with open(tmp_file_name) as tmp_file: + tested = tmp_file.read().splitlines() + files = os.listdir(dir) + return list(set(files)-set(tested)) + except: + # tmp_file doesn't exist, all files need testing + temp_folder = os.path.dirname(tmp_file_name) + if not os.path.exists(temp_folder): + os.makedirs(temp_folder) + print('[DETAIL] Created folder ' + temp_folder) + with open(tmp_file_name, "w") as tmp_file: + tmp_file.write('') + print('[DETAIL] Created temp file ' + tmp_file_name) + return os.listdir(dir) + +# Saves tested files so to not test again +def save_tested(data): + with open(tmp_file_name, "a") as tmp_file: + tmp_file.write(data) + +# Extract path to unrar from NZBGet's global option "UnrarCmd"; +# Since v15 "UnrarCmd" may contain extra parameters passed to unrar; +# We have to strip these parameters because we need only the path to unrar. +# Returns path to unrar executable. +def unrar(): + exe_name = 'unrar.exe' if os.name == 'nt' else 'unrar' + UnrarCmd = os.environ['NZBOP_UNRARCMD'] + if os.path.isfile(UnrarCmd) and UnrarCmd.lower().endswith(exe_name): + return UnrarCmd + args = shlex.split(UnrarCmd) + for arg in args: + if arg.lower().endswith(exe_name): + return arg + # We were unable to determine the path to unrar; + # Let's use the exe name with a hope it's in the search path + return exe_name + +# List contents of rar-files (without unpacking). +# That's how we detect fakes during download, when the download is not completed yet. +def list_all_rars(dir): + files = get_latest_file(dir) + tested = '' + out = '' + for file in files: + # avoid .tmp files as corrupt + if not "tmp" in file: + try: + command = [unrar(), "vb", dir + '/' + file] + if verbose: + print('command: %s' % command) + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out_tmp, err = proc.communicate() + out += out_tmp.decode() + result = proc.returncode + if verbose: + print(out_tmp) + except Exception as e: + print('[ERROR] Failed %s: %s' % (file, e)) + if verbose: + traceback.print_exc() + tested += file + '\n' + save_tested(tested) + return out.splitlines() + +# Detect fake nzbs. Returns True if a fake is detected. +def detect_fake(name, dir): + # Fake detection: + # If download contains media files AND executables we consider it a fake. + # QUEUE mode (called during download and before unpack): + # - if directory contains archives list their content and use the file + # names for detection; + # POST-PROCESSING mode (called after unpack): + # - scan directroy content and use file names for detection; + # - TODO: check video files using ffprobe. + # + # It's actually not necessary to check the mode (QUEUE or POST-PROCESSING), we always do all checks. + + filelist = [] + dir = os.path.normpath(dir) + filelist.extend([ o for o in os.listdir(dir) if os.path.isfile(os.path.join(dir, o)) ]) + dirlist = [ os.path.join(dir, o) for o in os.listdir(dir) if os.path.isdir(os.path.join(dir, o)) ] + filelist.extend(list_all_rars(dir)) + for subdir in dirlist: + filelist.extend(list_all_rars(subdir)) + if contains_media(filelist) and contains_executable(filelist): + print('[WARNING] Download has media files and executables') + # Remove info about banned extension from pp-parameter "NZBPR_PPSTATUS_FAKEBAN" + # (in a case it was saved previously) + print('[NZB] NZBPR_PPSTATUS_FAKEBAN=') + return True + banned_ext = contains_banned_media(filelist) + if banned_ext != '': + print('[WARNING] Download contains banned extension ' + banned_ext) + # Save details about banned extension in pp-parameter "NZBPR_PPSTATUS_FAKEBAN" + print('[NZB] NZBPR_PPSTATUS_FAKEBAN=' + banned_ext) + return True + return False + +# Establish connection to NZBGet via RPC-API +def connect_to_nzbget(): + # First we need to know connection info: host, port and password of NZBGet server. + # NZBGet passes all configuration options to scripts as environment variables. + host = os.environ['NZBOP_CONTROLIP'] + if host == '0.0.0.0': host = '127.0.0.1' + port = os.environ['NZBOP_CONTROLPORT'] + username = os.environ['NZBOP_CONTROLUSERNAME'] + password = os.environ['NZBOP_CONTROLPASSWORD'] + + # Build an URL for XML-RPC requests + # TODO: encode username and password in URL-format + xmlRpcUrl = 'http://%s:%s@%s:%s/xmlrpc' % (username, password, host, port); + + # Create remote server object + nzbget = ServerProxy(xmlRpcUrl) + return nzbget + +# Connect to NZBGet and call an RPC-API-method without using of python's XML-RPC. +# XML-RPC is easy to use but it is slow for large amount of data +def call_nzbget_direct(url_command): + # First we need to know connection info: host, port and password of NZBGet server. + # NZBGet passes all configuration options to scripts as environment variables. + host = os.environ['NZBOP_CONTROLIP'] + if host == '0.0.0.0': host = '127.0.0.1' + port = os.environ['NZBOP_CONTROLPORT'] + username = os.environ['NZBOP_CONTROLUSERNAME'] + password = os.environ['NZBOP_CONTROLPASSWORD'] + + # Building http-URL to call the method + httpUrl = 'http://%s:%s/jsonrpc/%s' % (host, port, url_command); + request = urllib.request.Request(httpUrl) + + authString = '%s:%s' % (username, password) + base64string = b64encode(authString.encode()).decode("ascii") + + request.add_header("Authorization", "Basic %s" % base64string) + + # Load data from NZBGet + response = urllib.request.urlopen(request) + data = response.read().decode('utf-8') + + # "data" is a JSON raw-string + return data + +# Reorder inner files for earlier fake detection +def sort_inner_files(): + nzb_id = int(os.environ.get('NZBNA_NZBID')) + + # Building command-URL to call method "listfiles" passing three parameters: (0, 0, nzb_id) + url_command = 'listfiles?1=0&2=0&3=%i' % nzb_id + data = call_nzbget_direct(url_command) + + # The "data" is a raw json-string. We could use json.loads(data) to + # parse it but json-module is slow. We parse it on our own. + + # Iterate through the list of files to find the last rar-file. + # The last is the one with the highest XX in ".partXX.rar" or ".rXX" + regex1 = re.compile('.*\.part(\d+)\.rar', re.IGNORECASE) + regex2 = re.compile('.*\.r(\d+)', re.IGNORECASE) + file_num = None + file_id = None + file_name = None + + for line in data.splitlines(): + if line.startswith('"ID" : '): + cur_id = int(line[7:len(line)-1]) + if line.startswith('"Filename" : "'): + cur_name = line[14:len(line)-2] + match = regex1.match(cur_name) or regex2.match(cur_name) + if (match): + cur_num = int(match.group(1)) + if not file_num or cur_num > file_num: + file_num = cur_num + file_id = cur_id + file_name = cur_name + + # Move the last rar-file to the top of file list + if (file_id): + print('[INFO] Moving last rar-file to the top: %s' % file_name) + # Create remote server object + nzbget = connect_to_nzbget() + # Using RPC-method "editqueue" of XML-RPC-object "nzbget". + # we could use direct http access here too but the speed isn't + # an issue here and XML-RPC is easier to use. + nzbget.editqueue('FileMoveTop', 0, '', [file_id]) + else: + print('[INFO] Skipping sorting since could not find any rar-files') + +# Remove current and any old temp files +def clean_up(): + nzb_id = os.environ.get('NZBPP_NZBID') + temp_folder = os.environ.get('NZBOP_TEMPDIR') + '/FakeDetector' + + nzbids = [] + files = os.listdir(temp_folder) + + if len(files) > 1: + # Create the list of nzbs in download queue + data = call_nzbget_direct('listgroups?1=0') + # The "data" is a raw json-string. We could use json.loads(data) to + # parse it but json-module is slow. We parse it on our own. + for line in data.splitlines(): + if line.startswith('"NZBID" : '): + cur_id = int(line[10:len(line)-1]) + nzbids.append(str(cur_id)) + + old_temp_files = list(set(files)-set(nzbids)) + if nzb_id in files and nzb_id not in old_temp_files: + old_temp_files.append(nzb_id) + + for temp_id in old_temp_files: + temp_file = temp_folder + '/' + str(temp_id) + try: + print('[DETAIL] Removing temp file ' + temp_file) + os.remove(temp_file) + except: + print('[ERROR] Could not remove temp file ' + temp_file) + +# Script body +def main(): + # Globally define directory for storing list of tested files + global tmp_file_name + + # Do start up check + start_check() + + # That's how we determine if the download is still runnning or is completely downloaded. + # We don't use this info in the fake detector (yet). + Downloading = os.environ.get('NZBNA_EVENT') == 'FILE_DOWNLOADED' + + # Depending on the mode in which the script was called (queue-script + # or post-processing-script) a different set of parameters (env. vars) + # is passed. They also have different prefixes: + # - NZBNA_ in queue-script mode; + # - NZBPP_ in pp-script mode. + Prefix = 'NZBNA_' if 'NZBNA_EVENT' in os.environ else 'NZBPP_' + + # Read context (what nzb is currently being processed) + Category = os.environ[Prefix + 'CATEGORY'] + Directory = os.environ[Prefix + 'DIRECTORY'] + NzbName = os.environ[Prefix + 'NZBNAME'] + + # Directory for storing list of tested files + tmp_file_name = os.environ.get('NZBOP_TEMPDIR') + '/FakeDetector/' + os.environ.get(Prefix + 'NZBID') + + # When nzb is added to queue - reorder inner files for earlier fake detection. + # Also it is possible that nzb was added with a category which doesn't have + # FakeDetector listed in the PostScript. In this case FakeDetector was not called + # when adding nzb to queue but it is being called now and we can reorder + # files now. + if os.environ.get('NZBNA_EVENT') == 'NZB_ADDED' or \ + (os.environ.get('NZBNA_EVENT') == 'FILE_DOWNLOADED' and \ + os.environ.get('NZBPR_FAKEDETECTOR_SORTED') != 'yes'): + print('[INFO] Sorting inner files for earlier fake detection for %s' % NzbName) + sys.stdout.flush() + sort_inner_files() + print('[NZB] NZBPR_FAKEDETECTOR_SORTED=yes') + if os.environ.get('NZBNA_EVENT') == 'NZB_ADDED': + sys.exit(POSTPROCESS_NONE) + + print('[DETAIL] Detecting fake for %s' % NzbName) + sys.stdout.flush() + + if detect_fake(NzbName, Directory): + # A fake is detected + # + # Add post-processing parameter "PPSTATUS_FAKE" for nzb-file. + # Scripts running after fake detector can check the parameter like this: + # if os.environ.get('NZBPR_PPSTATUS_FAKE') == 'yes': + # print('Marked as fake by another script') + print('[NZB] NZBPR_PPSTATUS_FAKE=yes') + + # Special command telling NZBGet to mark nzb as bad. The nzb will + # be removed from queue and become status "FAILURE/BAD". + print('[NZB] MARK=BAD') + else: + # Not a fake or at least doesn't look like a fake (yet). + # + # When nzb is downloaded again (using "Download again" from history) + # it may have been marked by our script as a fake. Since now the script + # doesn't consider nzb as fake we remove the old marking. That's + # of course a rare case that someone will redownload a fake but + # at least during debugging of fake detector we do that all the time. + if os.environ.get('NZBPR_PPSTATUS_FAKE') == 'yes': + print('[NZB] NZBPR_PPSTATUS_FAKE=') + + print('[DETAIL] Detecting completed for %s' % NzbName) + sys.stdout.flush() + + # Remove temp files in PP + if Prefix == 'NZBPP_': + clean_up() + +# Execute main script function +main() + +# All OK, returning exit status 'POSTPROCESS_SUCCESS' (int <93>) to let NZBGet know +# that our script has successfully completed (only for pp-script mode). +sys.exit(POSTPROCESS_SUCCESS) \ No newline at end of file