# Install Docker
# Please follow the instructions at https://docs.docker.com/install/
# or try the local install with rpm packages
wget -c https://ref-db.edgebioinformatics.org/EDGE/Docker/docker-ce-dependencies-rpms-for-centos.tgz
tar xvzf docker-ce-dependencies-rpms-for-centos.tgz
sudo yum localinstall *rpm

# Download the docker images
wget -c https://ref-db.edgebioinformatics.org/EDGE/Docker/edge_24_1_ubuntu18_20231115.tar.gz
wget -c https://ref-db.edgebioinformatics.org/EDGE/Docker/edge_ubuntu_mysql_docker.tgz

# Import Docker saved image 
docker load < edge_24_1_ubuntu18_20231115.tar.gz
docker load < edge_ubuntu_mysql_docker.tgz

# Download EDGE database from web server

You will need >500GB disk space for all databases

    ## Pipeline database is ~17Gb and contains the other databases needed for EDGE
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_pipeline_databases.tgz

  ## BWA index is ~41Gb and contains the databases for bwa taxonomic identification pipeline
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_bwa_index.tgz

  ## HOST genomes BWA index is ~41Gb for Host removal, including human, bacteria, phiX, viruses, invertebrate vectors of human pathogens
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_HostIndex.tgz

  ## NCBI Genomes is ~21Gb and contain the full genomes for prokaryotes and some viruses
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_NCBI_genomes.tgz

  ## GOTTCHA database is ~16Gb and contains the custom databases for the GOTTCHA taxonomic identification pipeline
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_GOTTCHA_db.tgz

  ## NT database is ~25Gb and contains the NCBI nt database for contig identification
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_nt_20160426.tgz

  ## ShortBRED database is ~27Mb and contains the databases used by ShortBRED for virulence factors and read based antibiotic resistance analysis
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_ShortBRED_Database.tgz

  ## Diamond database is ~16Gb and contains the databases from RefSeq for protein based taxonomic identification
  wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_diamond_db.tgz

 ## MetaPhlAn4 database is 14Gb file contains the databases used for the MetaPhlAn4 taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_metaphlan4DB.tgz

 ## GOTTCHA2 databases is 38Gb file and contains the custom databases for the GOTTCHA2 taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_GOTTCHA2_db_20190729.tgz

 ## Kraken2 database is 39Gb file contains the databases used for the Kraken2 taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_Kraken2_db_20211216.tgz

 ## Centrifuge database is 20G file contains the databases used for the Centrifuge taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_Centrifuge_db_20200329.tgz

 ## PanGIA database is 35G file for PanGIA taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_PanGIA_db.tgz

 ## MICCR database is 48GB contains the databases used for the contig taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_ContigTax_db_20190114.tgz

 ## CheckM database is 275MB contains the databases used for the Metagenome Binned contig quality assessment.
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_checkM_db_20190213.tgz

 ## Qiime2 database is 1.4GB contains 16s,18s and ITS db.
 wget -c  https://ref-db.edgebioinformatics.org/EDGE/dev/edge_qiime2_db_20230719.tgz

 ## AntiSmash database is 3.2GB contains pfam resfam tigrfam can clusterblast db for antismash version 6
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_AntiSmash6.tgz

 (Optional)
 ## Other Host bwa index ~18Gb for host removal, including pig, sheep, cow, monkey, hamster. and goat.
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_otherHostIndex.tgz

 ## For machine with < 32Gb memory, we suggest to use the smaller BWA index (~14Gb) and contains the databases for bwa taxonomic identification pipeline
 wget -c https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_bwa_mini_index.tgz

 # Decompressed each tar.gz file (tar -xzvf) for being used later


# Usage

   $ docker create --name mysql_data --volume /var/lib/mysql  bioedge/edge_ubuntu_mysql
   
   $ docker run -d --privileged=true --security-opt "seccomp:unconfined" \
    --cap-add=SYS_ADMIN --cap-add=SYS_PTRACE  \
    --volumes-from mysql_data \
    -v /path/to/database:/home/edge/database \
    -v /path/to/EDGE_output:/home/edge/EDGE_output \
    -v /path/to/EDGE_input:/home/edge/EDGE_input \
    -v /path/to/EDGE_report:/home/edge/EDGE_report \
    -p 80:80 -p 8080:8080 --name edge bioedge/edge_24_1_ubuntu18:20231115

Wait for few seconds for the docker image to start EDGE service and Open http://localhost/ on the browser to start experience EDGE. 

* The -v /path/to/database:/home/edge/database mounts the databse obtained from the above download step. 
* The -v /path/to/EDGE_input://home/edge/EDGE_input mounts the EDGE input directory structure (obtain from the git clone above) to persist the input/upload files/user projects in the host. 
* The -v /path/to/EDGE_output://home/edge/EDGE_output mounts the EDGE output directory to persist the output files in the host. 
* The -v /path/to/EDGE_report:/home/edge/EDGE_report mounts the EDGE report directory to persist the report files in the host.
* The -p host:container bind the host port 80 and 8080 to container port 80 and 8080 inside the container. You can change the 80 and 8080 to fit your host system requirements.

###  Default credentials

* EDGE user: admin_docker@my.edge/Admin1234!

* For security, you may need update the credentials if the server will be used by others or public. 

### Note
* This image is built on top of offical Ubuntu 18.04.2 LTS Base Image, and is officially supported on Docker Engine version 18.09.2.
* The user management can be accessed by http://localhost:8080/userManagement if host port is 8080

### Commands for checking status and error log

* Check the mariadb status in container:
    $ docker exec edge service mysql status

where "edge" is the container name when user `docker run` it with --name flag 

* And user management system service status: 
    $ docker exec edge service tomcat7 status

* For the Apache web server status and log: 
    $ docker exec edge service apache2 status 
    $ docker exec edge tail /var/log/apache2/error.log
    $ docker exec edge tail /var/log/apache2/access.log

### Citation
Po-E Li, Chien-Chi Lo, Joseph J. Anderson, Karen W. Davenport, Kimberly A. Bishop-Lilly, Yan Xu, Sanaa Ahmed, Shihai Feng, Vishwesh P. Mokashi, Patrick S.G. Chain; Enabling the democratization of the genomics revolution with a fully integrated web-based bioinformatics platform, Nucleic Acids Research, Volume 45, Issue 1, 9 January 2017, Pages 67–80, https://doi.org/10.1093/nar/gkw1027

### Contact Info
Chien-Chi Lo:  <chienchi@lanl.gov>
Paul Li: <po-e@lanl.gov>