#!/usr/bin/env python # # Fake detection script for NZBGet # # Copyright (C) 2014-2016 Andrey Prygunkov # Copyright (C) 2014 Clinton Hall # Copyright (C) 2014 JVM # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # ############################################################################## ### NZBGET QUEUE/POST-PROCESSING SCRIPT ### ### QUEUE EVENTS: NZB_ADDED, NZB_DOWNLOADED, FILE_DOWNLOADED # Detect nzbs with fake media files. # # If a fake is detected the download is marked as bad. NZBGet removes # the download from queue and (if option "DeleteCleanupDisk" is active) the # downloaded files are deleted from disk. If duplicate handling is active # (option "DupeCheck") then another duplicate is chosen for download # if available. # # The status "FAILURE/BAD" is passed to other scripts and informs them # about failure. # # PP-Script version: 2.0. # # For more info and updates please visit forum topic at # http://nzbget.net/forum/viewtopic.php?f=8&t=1394. # # NOTE: This script requires Python to be installed on your system (tested # only with Python 3.x). ############################################################################## ### OPTIONS ### # Banned extensions. # # Downloads which contain files with any of the following extensions will be marked as fake. # Extensions must be separated by a comma (eg: .wmv, .divx). #BannedExtensions= ### NZBGET QUEUE/POST-PROCESSING SCRIPT ### ############################################################################## import os import sys import subprocess import re import urllib.request, urllib.error, urllib.parse from xmlrpc.client import ServerProxy from base64 import b64encode import shlex import traceback # Exit codes used by NZBGet for post-processing scripts. # Queue-scripts don't have any special exit codes. POSTPROCESS_SUCCESS=93 POSTPROCESS_NONE=95 POSTPROCESS_ERROR=94 mediaExtensions = ['.mkv', '.avi', '.divx', '.xvid', '.mov', '.wmv', '.mp4', '.mpg', '.mpeg', '.vob', '.iso', '.m4v'] bannedMediaExtensions = os.environ.get('NZBPO_BANNEDEXTENSIONS').replace(' ', '').split(',') verbose = False # Start up checks def start_check(): # Check if the script is called from a compatible NZBGet version (as queue-script or as pp-script) if not ('NZBNA_EVENT' in os.environ or 'NZBPP_DIRECTORY' in os.environ) or not 'NZBOP_ARTICLECACHE' in os.environ: print('*** NZBGet queue script ***') print('This script is supposed to be called from nzbget (14.0 or later).') sys.exit(1) # This script processes only certain queue events. # For compatibility with newer NZBGet versions it ignores event types it doesn't know if os.environ.get('NZBNA_EVENT') not in ['NZB_ADDED', 'FILE_DOWNLOADED', 'NZB_DOWNLOADED', None]: sys.exit(0) # If nzb was already marked as bad don't do any further detection if os.environ.get('NZBPP_STATUS') == 'FAILURE/BAD': if os.environ.get('NZBPR_PPSTATUS_FAKE') == 'yes': # Print the message again during post-processing to add it into the post-processing log # (which is then can be used by notification scripts such as EMail.py) # Pp-parameter "NZBPR_PPSTATUS_FAKEBAN" contains more details (saved previously by our script) if os.environ.get('NZBPR_PPSTATUS_FAKEBAN') == None: print('[WARNING] Download has media files and executables') else: print('[WARNING] Download contains banned extension ' + os.environ.get('NZBPR_PPSTATUS_FAKEBAN')) clean_up() sys.exit(POSTPROCESS_SUCCESS) # If called via "Post-process again" from history details dialog the download may not exist anymore if 'NZBPP_DIRECTORY' in os.environ and not os.path.exists(os.environ.get('NZBPP_DIRECTORY')): print('Destination directory doesn\'t exist, exiting') clean_up() sys.exit(POSTPROCESS_NONE) # If nzb is already failed, don't do any further detection if os.environ.get('NZBPP_TOTALSTATUS') == 'FAILURE': clean_up() sys.exit(POSTPROCESS_NONE) # Check if media files present in the list of files def contains_media(list): for item in list: if os.path.splitext(item)[1] in mediaExtensions: return True else: continue return False # Check if banned media files present in the list of files def contains_banned_media(list): for item in list: if os.path.splitext(item)[1] in bannedMediaExtensions: print('[INFO] Found file with banned extension: ' + item) return os.path.splitext(item)[1] else: continue return '' # Check if executable files present in the list of files # Exception: rename.bat (.sh, .exe) are ignored, sometimes valid posts include them. def contains_executable(list): exExtensions = [ '.exe', '.bat', '.sh' ] allowNames = [ 'rename', 'Rename' ] excludePath = [ r'reverse', r'spiegelen' ] for item in list: ep = False for ap in excludePath: if re.search(ap, item, re.I): ep = True break if ep: continue name, ext = os.path.splitext(item) if os.path.split(name)[1] != "": name = os.path.split(name)[1] if ext == '.exe' or (ext in exExtensions and not name in allowNames): print('[INFO] Found executable %s' % item) return True else: continue return False # Finds untested files, comparing all files and processed files in tmp_file def get_latest_file(dir): try: with open(tmp_file_name) as tmp_file: tested = tmp_file.read().splitlines() files = os.listdir(dir) return list(set(files)-set(tested)) except: # tmp_file doesn't exist, all files need testing temp_folder = os.path.dirname(tmp_file_name) if not os.path.exists(temp_folder): os.makedirs(temp_folder) print('[DETAIL] Created folder ' + temp_folder) with open(tmp_file_name, "w") as tmp_file: tmp_file.write('') print('[DETAIL] Created temp file ' + tmp_file_name) return os.listdir(dir) # Saves tested files so to not test again def save_tested(data): with open(tmp_file_name, "a") as tmp_file: tmp_file.write(data) # Extract path to unrar from NZBGet's global option "UnrarCmd"; # Since v15 "UnrarCmd" may contain extra parameters passed to unrar; # We have to strip these parameters because we need only the path to unrar. # Returns path to unrar executable. def unrar(): exe_name = 'unrar.exe' if os.name == 'nt' else 'unrar' UnrarCmd = os.environ['NZBOP_UNRARCMD'] if os.path.isfile(UnrarCmd) and UnrarCmd.lower().endswith(exe_name): return UnrarCmd args = shlex.split(UnrarCmd) for arg in args: if arg.lower().endswith(exe_name): return arg # We were unable to determine the path to unrar; # Let's use the exe name with a hope it's in the search path return exe_name # List contents of rar-files (without unpacking). # That's how we detect fakes during download, when the download is not completed yet. def list_all_rars(dir): files = get_latest_file(dir) tested = '' out = '' for file in files: # avoid .tmp files as corrupt if not "tmp" in file: try: command = [unrar(), "vb", dir + '/' + file] if verbose: print('command: %s' % command) proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out_tmp, err = proc.communicate() out += out_tmp.decode() result = proc.returncode if verbose: print(out_tmp) except Exception as e: print('[ERROR] Failed %s: %s' % (file, e)) if verbose: traceback.print_exc() tested += file + '\n' save_tested(tested) return out.splitlines() # Detect fake nzbs. Returns True if a fake is detected. def detect_fake(name, dir): # Fake detection: # If download contains media files AND executables we consider it a fake. # QUEUE mode (called during download and before unpack): # - if directory contains archives list their content and use the file # names for detection; # POST-PROCESSING mode (called after unpack): # - scan directroy content and use file names for detection; # - TODO: check video files using ffprobe. # # It's actually not necessary to check the mode (QUEUE or POST-PROCESSING), we always do all checks. filelist = [] dir = os.path.normpath(dir) filelist.extend([ o for o in os.listdir(dir) if os.path.isfile(os.path.join(dir, o)) ]) dirlist = [ os.path.join(dir, o) for o in os.listdir(dir) if os.path.isdir(os.path.join(dir, o)) ] filelist.extend(list_all_rars(dir)) for subdir in dirlist: filelist.extend(list_all_rars(subdir)) if contains_media(filelist) and contains_executable(filelist): print('[WARNING] Download has media files and executables') # Remove info about banned extension from pp-parameter "NZBPR_PPSTATUS_FAKEBAN" # (in a case it was saved previously) print('[NZB] NZBPR_PPSTATUS_FAKEBAN=') return True banned_ext = contains_banned_media(filelist) if banned_ext != '': print('[WARNING] Download contains banned extension ' + banned_ext) # Save details about banned extension in pp-parameter "NZBPR_PPSTATUS_FAKEBAN" print('[NZB] NZBPR_PPSTATUS_FAKEBAN=' + banned_ext) return True return False # Establish connection to NZBGet via RPC-API def connect_to_nzbget(): # First we need to know connection info: host, port and password of NZBGet server. # NZBGet passes all configuration options to scripts as environment variables. host = os.environ['NZBOP_CONTROLIP'] if host == '0.0.0.0': host = '127.0.0.1' port = os.environ['NZBOP_CONTROLPORT'] username = os.environ['NZBOP_CONTROLUSERNAME'] password = os.environ['NZBOP_CONTROLPASSWORD'] # Build an URL for XML-RPC requests # TODO: encode username and password in URL-format xmlRpcUrl = 'http://%s:%s@%s:%s/xmlrpc' % (username, password, host, port); # Create remote server object nzbget = ServerProxy(xmlRpcUrl) return nzbget # Connect to NZBGet and call an RPC-API-method without using of python's XML-RPC. # XML-RPC is easy to use but it is slow for large amount of data def call_nzbget_direct(url_command): # First we need to know connection info: host, port and password of NZBGet server. # NZBGet passes all configuration options to scripts as environment variables. host = os.environ['NZBOP_CONTROLIP'] if host == '0.0.0.0': host = '127.0.0.1' port = os.environ['NZBOP_CONTROLPORT'] username = os.environ['NZBOP_CONTROLUSERNAME'] password = os.environ['NZBOP_CONTROLPASSWORD'] # Building http-URL to call the method httpUrl = 'http://%s:%s/jsonrpc/%s' % (host, port, url_command); request = urllib.request.Request(httpUrl) authString = '%s:%s' % (username, password) base64string = b64encode(authString.encode()).decode("ascii") request.add_header("Authorization", "Basic %s" % base64string) # Load data from NZBGet response = urllib.request.urlopen(request) data = response.read().decode('utf-8') # "data" is a JSON raw-string return data # Reorder inner files for earlier fake detection def sort_inner_files(): nzb_id = int(os.environ.get('NZBNA_NZBID')) # Building command-URL to call method "listfiles" passing three parameters: (0, 0, nzb_id) url_command = 'listfiles?1=0&2=0&3=%i' % nzb_id data = call_nzbget_direct(url_command) # The "data" is a raw json-string. We could use json.loads(data) to # parse it but json-module is slow. We parse it on our own. # Iterate through the list of files to find the last rar-file. # The last is the one with the highest XX in ".partXX.rar" or ".rXX" regex1 = re.compile('.*\.part(\d+)\.rar', re.IGNORECASE) regex2 = re.compile('.*\.r(\d+)', re.IGNORECASE) file_num = None file_id = None file_name = None for line in data.splitlines(): if line.startswith('"ID" : '): cur_id = int(line[7:len(line)-1]) if line.startswith('"Filename" : "'): cur_name = line[14:len(line)-2] match = regex1.match(cur_name) or regex2.match(cur_name) if (match): cur_num = int(match.group(1)) if not file_num or cur_num > file_num: file_num = cur_num file_id = cur_id file_name = cur_name # Move the last rar-file to the top of file list if (file_id): print('[INFO] Moving last rar-file to the top: %s' % file_name) # Create remote server object nzbget = connect_to_nzbget() # Using RPC-method "editqueue" of XML-RPC-object "nzbget". # we could use direct http access here too but the speed isn't # an issue here and XML-RPC is easier to use. nzbget.editqueue('FileMoveTop', 0, '', [file_id]) else: print('[INFO] Skipping sorting since could not find any rar-files') # Remove current and any old temp files def clean_up(): nzb_id = os.environ.get('NZBPP_NZBID') temp_folder = os.environ.get('NZBOP_TEMPDIR') + '/FakeDetector' nzbids = [] files = os.listdir(temp_folder) if len(files) > 1: # Create the list of nzbs in download queue data = call_nzbget_direct('listgroups?1=0') # The "data" is a raw json-string. We could use json.loads(data) to # parse it but json-module is slow. We parse it on our own. for line in data.splitlines(): if line.startswith('"NZBID" : '): cur_id = int(line[10:len(line)-1]) nzbids.append(str(cur_id)) old_temp_files = list(set(files)-set(nzbids)) if nzb_id in files and nzb_id not in old_temp_files: old_temp_files.append(nzb_id) for temp_id in old_temp_files: temp_file = temp_folder + '/' + str(temp_id) try: print('[DETAIL] Removing temp file ' + temp_file) os.remove(temp_file) except: print('[ERROR] Could not remove temp file ' + temp_file) # Script body def main(): # Globally define directory for storing list of tested files global tmp_file_name # Do start up check start_check() # That's how we determine if the download is still runnning or is completely downloaded. # We don't use this info in the fake detector (yet). Downloading = os.environ.get('NZBNA_EVENT') == 'FILE_DOWNLOADED' # Depending on the mode in which the script was called (queue-script # or post-processing-script) a different set of parameters (env. vars) # is passed. They also have different prefixes: # - NZBNA_ in queue-script mode; # - NZBPP_ in pp-script mode. Prefix = 'NZBNA_' if 'NZBNA_EVENT' in os.environ else 'NZBPP_' # Read context (what nzb is currently being processed) Category = os.environ[Prefix + 'CATEGORY'] Directory = os.environ[Prefix + 'DIRECTORY'] NzbName = os.environ[Prefix + 'NZBNAME'] # Directory for storing list of tested files tmp_file_name = os.environ.get('NZBOP_TEMPDIR') + '/FakeDetector/' + os.environ.get(Prefix + 'NZBID') # When nzb is added to queue - reorder inner files for earlier fake detection. # Also it is possible that nzb was added with a category which doesn't have # FakeDetector listed in the PostScript. In this case FakeDetector was not called # when adding nzb to queue but it is being called now and we can reorder # files now. if os.environ.get('NZBNA_EVENT') == 'NZB_ADDED' or \ (os.environ.get('NZBNA_EVENT') == 'FILE_DOWNLOADED' and \ os.environ.get('NZBPR_FAKEDETECTOR_SORTED') != 'yes'): print('[INFO] Sorting inner files for earlier fake detection for %s' % NzbName) sys.stdout.flush() sort_inner_files() print('[NZB] NZBPR_FAKEDETECTOR_SORTED=yes') if os.environ.get('NZBNA_EVENT') == 'NZB_ADDED': sys.exit(POSTPROCESS_NONE) print('[DETAIL] Detecting fake for %s' % NzbName) sys.stdout.flush() if detect_fake(NzbName, Directory): # A fake is detected # # Add post-processing parameter "PPSTATUS_FAKE" for nzb-file. # Scripts running after fake detector can check the parameter like this: # if os.environ.get('NZBPR_PPSTATUS_FAKE') == 'yes': # print('Marked as fake by another script') print('[NZB] NZBPR_PPSTATUS_FAKE=yes') # Special command telling NZBGet to mark nzb as bad. The nzb will # be removed from queue and become status "FAILURE/BAD". print('[NZB] MARK=BAD') else: # Not a fake or at least doesn't look like a fake (yet). # # When nzb is downloaded again (using "Download again" from history) # it may have been marked by our script as a fake. Since now the script # doesn't consider nzb as fake we remove the old marking. That's # of course a rare case that someone will redownload a fake but # at least during debugging of fake detector we do that all the time. if os.environ.get('NZBPR_PPSTATUS_FAKE') == 'yes': print('[NZB] NZBPR_PPSTATUS_FAKE=') print('[DETAIL] Detecting completed for %s' % NzbName) sys.stdout.flush() # Remove temp files in PP if Prefix == 'NZBPP_': clean_up() # Execute main script function main() # All OK, returning exit status 'POSTPROCESS_SUCCESS' (int <93>) to let NZBGet know # that our script has successfully completed (only for pp-script mode). sys.exit(POSTPROCESS_SUCCESS)