#!/usr/bin/env python3 """ This script scans a web server's directory (based on `servers` dict), and saves a JSON file that includes all supported data files in this directory. The output JSON file can be loaded directly into Washington University Genome Browser (WUGB). Two arguments are required by the script: * genome type, such as `hg19` or `hg38` * directory name Two new files will be generated in the input directory: * `wugb.json`: JSON file for WUGB * `wugb_url.txt`: the URL of WUGB (also shown at the end of this program) """ import json import os import socket import sys # Web servers config servers = { # plutus 'plutus': { 'www_root': '/mnt/data1/www/html/', 'main_url': 'http://faryabi05.med.upenn.edu/', }, # simurgh 'simurgh': { 'www_root': '/mnt/data0/www/html/', 'main_url': 'http://faryabi17.pmacs.upenn.edu/', }, } # Types of data that will be rendered in WashU Genome Browser data_types = { '.bw': 'bigwig', '.cool': 'cool', '.hic': 'hic', } # Main URL of WUGB WUGB_URL = "http://epigenomegateway.wustl.edu/browser/" # Output filenames JSON_FILENAME = "wugb.json" URL_FILENAME = "wugb_url.txt" def syntax(): print("Syntax:") print(" make_wugb_json.py [genome_type] [data_directory]\n") print("For example:") print(" make_wugb_json.py hg38 my_data") def chk_data_dir(dir_name, data_root): """ Check whether: * `dir_name` is a directory * `dirname` is a sub-directory of `data_root` Return the absolute path of `dir_name`. """ abs_path = os.path.abspath(dir_name) if not os.path.isdir(abs_path): print(f"ERROR: '{dir_name}' is not a directory") sys.exit(2) if not abs_path.startswith(data_root): print(f"ERROR: '{dir_name}' is not located in '{data_root}'") sys.exit(3) return abs_path # Main if __name__ == '__main__': if len(sys.argv) != 3: syntax() sys.exit(1) # Exit if the server is not found in `servers` server_name = socket.gethostname() if server_name not in servers: print(f"ERROR: '{server_name}' not supported") sys.exit(3) www_root = servers[server_name]['www_root'] main_url = servers[server_name]['main_url'] genome_type = sys.argv[1] data_dir = sys.argv[2] abs_data_dir = chk_data_dir(data_dir, www_root) wugb_hub = list() for root, _, files in os.walk(abs_data_dir): # Make sure `root` is accessible by all users # os.chmod(root, 0o755) for f in files: # Skip the files whose types are not supported _, f_type = os.path.splitext(f) if f_type not in data_types: continue abs_file_path = os.path.join(root, f) sub_url = os.path.relpath(abs_file_path, start=www_root) data_url = main_url + sub_url hub_entry = dict() hub_entry["type"] = data_types[f_type] hub_entry["url"] = data_url # hub_entry["name"] = f.split('.')[0] hub_entry["name"] = f # Tim wants to display the full name hub_entry["showOnHubLoad"] = True # Special options for `cool` and `hic` data types, see: # https://epigenomegateway.readthedocs.io/en/latest/datahub.html#example-hic-track if f_type in ['cool', 'hic']: hub_entry['options'] = { 'displayMode': 'arc', } wugb_hub.append(hub_entry) # add entry to hub # Create `wugb.json` in `data_dir` # json_path = os.path.join(abs_data_dir, JSON_FILENAME) json_path = "/mnt/data1/www/html/Tim/wugb.json" with open(json_path, "w") as ofh: json.dump(wugb_hub, ofh, indent=2) ofh.write("\n") # end the JSON file with a newline character json_url = main_url + os.path.relpath(json_path, start=www_root) # Create `wugb_url.txt` in `bw_dir`: # url_path = os.path.join(abs_data_dir, URL_FILENAME) url_path = "/mnt/data1/www/html/Tim/wugb_url.txt" url_str = f"{WUGB_URL}?genome={genome_type}&hub={json_url}" with open(url_path, "w") as ofh: ofh.write(f"{url_str}\n") # Print `json_path` and `url_str` on stdout print(f"JSON file is saved as: {json_path}") print(f"View it at: {url_str}") print(f"URL is saved as: {url_path}")