reduced number of dependencies

zachary-foster · zachary-foster · commit f84ff97eedea · 2021-06-22T19:55:37.000-07:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,17 +20,11 @@ Imports:
     stringr,
     ggplot2,
     igraph,
-    scales,
     grid,
     taxize,
     seqinr,
-    reshape2,
-    zoo,
-    traits,
-    RColorBrewer,
     RCurl,
     ape,
-    reshape,
     stats,
     grDevices,
     utils,
@@ -39,26 +33,23 @@ Imports:
     magrittr,
     readr,
     rlang,
-    phylotate,
     ggfittext,
     vegan,
-    ggrepel,
     cowplot,
     GA,
     Rcpp,
     crayon,
-    svglite,
-    viridisLite,
     tibble,
-    R6,
-    jsonlite
+    R6
 Suggests:
     knitr,
     rmarkdown,
     testthat,
     zlibbioc,
     BiocManager,
     phyloseq,
+    phylotate,
+    traits,
     biomformat
 VignetteBuilder: knitr
 RoxygenNote: 7.1.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -238,8 +238,6 @@ export(write_rdp)
 export(write_silva_fasta)
 export(write_unite_general)
 export(zero_low_counts)
-import(R6)
-import(jsonlite)
 importFrom(Rcpp,sourceCpp)
 importFrom(dplyr,contains)
 importFrom(dplyr,ends_with)
diff --git a/R/heat_tree--mapping.R b/R/heat_tree--mapping.R
@@ -1,7 +1,7 @@
 #' Rescale numeric vector to have specified minimum and maximum.
 #' 
 #' Rescale numeric vector to have specified minimum and maximum, but allow for hard boundaries.
-#' Light wrapper for scales::rescale
+#' It is a slightly modified version of scales::rescale, incorporating scales::zero_range, both by Hadley Wickham used under the conditions of the MIT license.
 #' 
 #' @param x values to rescale
 #' @param to range to scale to
@@ -11,7 +11,38 @@
 #' @keywords internal
 rescale <- function (x, to = c(0, 1), from = range(x, na.rm = TRUE, finite = TRUE), hard_bounds = TRUE) 
 {
-  result <- scales::rescale(x, to, from)
+  # COPIED FROM scales::zero_range by Hadley Wickham
+  zero_range <- function(x, tol = 1000 * .Machine$double.eps) {
+    if (length(x) == 1) return(TRUE)
+    if (length(x) != 2) stop("x must be length 1 or 2")
+    if (any(is.na(x))) return(NA)
+    
+    # Special case: if they are equal as determined by ==, then there
+    # is zero range. Also handles (Inf, Inf) and (-Inf, -Inf)
+    if (x[1] == x[2]) return(TRUE)
+    
+    # If we reach this, then x must be (-Inf, Inf) or (Inf, -Inf)
+    if (all(is.infinite(x))) return(FALSE)
+    
+    # Take the smaller (in magnitude) value of x, and use it as the scaling
+    # factor.
+    m <- min(abs(x))
+    
+    # If we get here, then exactly one of the x's is 0. Return FALSE
+    if (m == 0) return(FALSE)
+    
+    # If x[1] - x[2] (scaled to 1) is smaller than tol, then return
+    # TRUE; otherwise return FALSE
+    abs((x[1] - x[2]) / m) < tol
+  }
+  
+  # COPIED FROM scales::rescale by Hadley Wickham
+  if (zero_range(from) || zero_range(to)) {
+    return(ifelse(is.na(x), NA, mean(to)))
+  }
+  result <- (x - from[1]) / diff(from) * diff(to) + to[1]
+  
+  # Hard bound implementations 
   if (hard_bounds) {
     result[result > max(to)] <- max(to)
     result[result < min(to)] <- min(to)
@@ -64,7 +95,9 @@ apply_color_scale <- function(values, color_series, interval = NULL, no_color_in
 #' 
 #' @export
 quantative_palette <- function() {
-  return(c("#BBBBBB", rev(viridisLite::viridis(7, begin = .4, end = .9))))
+  # produced with: c("#BBBBBB", rev(viridisLite::viridis(7, begin = .4, end = .9)))
+  return(c("#BBBBBB", "#BBDF27FF", "#85D44AFF", "#54C568FF", "#2FB47CFF", 
+           "#1FA188FF", "#228C8DFF", "#2A788EFF"))
 }
 
 
@@ -79,7 +112,10 @@ quantative_palette <- function() {
 #' 
 #' @export
 qualitative_palette <- function() {
-  return(c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(9, "Pastel1")))
+  # produced with c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(9, "Pastel1"))
+  return(c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", 
+           "#A65628", "#F781BF", "#999999", "#FBB4AE", "#B3CDE3", "#CCEBC5", 
+           "#DECBE4", "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2"))
 }
 
 #' The default diverging color palette
diff --git a/R/heat_tree--vertex_size.R b/R/heat_tree--vertex_size.R
@@ -17,9 +17,9 @@
 molten_dist <- function(x, y) {
   data <- as.matrix(stats::dist(cbind(x, y)))
   data[!lower.tri(data)] <- NA
-  data <- reshape2::melt(data)
-  names(data) <- c("index_1", "index_2", "distance")
-  data[!is.na(data$distance), ]    
+  molten_data <- data.frame(index_1 = rep(1:nrow(data), ncol(data)), index_2 = rep(1:nrow(data), each = ncol(data)))
+  molten_data$distance <- mapply(function(i1, i2) data[i1, i2], molten_data$index_1, molten_data$index_2)
+  molten_data[!is.na(molten_data$distance), ]    
 }
 
 #===================================================================================================
diff --git a/R/old_taxa--package.R b/R/old_taxa--package.R
@@ -208,7 +208,6 @@
 #'   Checkout the vignette
 #'   (`browseVignettes("taxa")`) for detailed introduction and examples.
 #'
-#' @import R6 jsonlite
 #' @name taxa-package
 #' @docType package
 #' @keywords package
diff --git a/R/parsers.R b/R/parsers.R
@@ -399,6 +399,9 @@ parse_qiime_biom <- function(file, class_regex = "(.*)",
 #' 
 #' @export
 parse_newick <- function(file = NULL, text = NULL) {
+  # Check that the "phylotate" package has been installed
+  check_for_pkg("phylotate")
+  
   # Check that `file` and `text` and `table` are not used together
  if (sum(c(is.null(file), is.null(text))) != 1) {
     stop(paste0('Either "file" or "text" must be supplied, but not both.'))
diff --git a/R/sequence_download.R b/R/sequence_download.R
@@ -1,36 +1,63 @@
 #' Download representative sequences for a taxon
-#' 
-#' Downloads a sample of sequences meant to evenly capture the diversity of a given taxon.
-#' Can be used to get a shallow sampling of vast groups. 
-#' \strong{CAUTION:} This function can make MANY queries to Genbank depending on arguments given and
-#' can take a very long time. 
-#' Choose your arguments carefully to avoid long waits and needlessly stressing NCBI's servers.
-#' Use a downloaded database and a parser from the \code{taxa} package when possible.
-#' 
-#' @param name (\code{character} of length 1) The taxon to download a sample of sequences for.
-#' @param id (\code{character} of length 1) The taxon id to download a sample of sequences for.
-#' @param target_rank (\code{character} of length 1) The finest taxonomic rank at which
-#'   to sample. The finest rank at which replication occurs. Must be a finer rank than 
-#'   \code{taxon}. 
-#' @param min_counts (named \code{numeric}) The minimum number of sequences to download for each
-#'   taxonomic rank. The names correspond to taxonomic ranks. 
-#' @param max_counts (named \code{numeric}) The maximum number of sequences to download for each
-#'   taxonomic rank. The names correspond to taxonomic ranks. 
-#' @param interpolate_min (\code{logical}) If \code{TRUE}, values supplied to \code{min_counts}
-#'   and \code{min_children} will be used to infer the values of intermediate ranks not
-#'   specified. Linear interpolation between values of specified ranks will be used to determine
-#'   values of unspecified ranks.
-#' @param interpolate_max (\code{logical}) If \code{TRUE}, values supplied to \code{max_counts}
-#'   and \code{max_children} will be used to infer the values of intermediate ranks not
-#'   specified. Linear interpolation between values of specified ranks will be used to determine
-#'   values of unspecified ranks.
-#' @param min_children (named \code{numeric}) The minimum number sub-taxa of taxa for a given
-#' rank must have for its sequences to be searched. The names correspond to taxonomic ranks. 
-#' @param max_children (named \code{numeric}) The maximum number sub-taxa of taxa for a given
-#' rank must have for its sequences to be searched. The names correspond to taxonomic ranks.
-#' @param verbose (\code{logical}) If \code{TRUE}, progress messages will be printed.
-#' @inheritParams  traits::ncbi_searcher
-#' 
+#'
+#' Downloads a sample of sequences meant to evenly capture the diversity of a
+#' given taxon. Can be used to get a shallow sampling of vast groups.
+#' \strong{CAUTION:} This function can make MANY queries to Genbank depending on
+#' arguments given and can take a very long time. Choose your arguments
+#' carefully to avoid long waits and needlessly stressing NCBI's servers. Use a
+#' downloaded database and a parser from the \code{taxa} package when possible.
+#'
+#' @param name (\code{character} of length 1) The taxon to download a sample of
+#'   sequences for.
+#' @param id (\code{character} of length 1) The taxon id to download a sample of
+#'   sequences for.
+#' @param target_rank (\code{character} of length 1) The finest taxonomic rank
+#'   at which to sample. The finest rank at which replication occurs. Must be a
+#'   finer rank than \code{taxon}.
+#' @param min_counts (named \code{numeric}) The minimum number of sequences to
+#'   download for each taxonomic rank. The names correspond to taxonomic ranks.
+#' @param max_counts (named \code{numeric}) The maximum number of sequences to
+#'   download for each taxonomic rank. The names correspond to taxonomic ranks.
+#' @param interpolate_min (\code{logical}) If \code{TRUE}, values supplied to
+#'   \code{min_counts} and \code{min_children} will be used to infer the values
+#'   of intermediate ranks not specified. Linear interpolation between values of
+#'   specified ranks will be used to determine values of unspecified ranks.
+#' @param interpolate_max (\code{logical}) If \code{TRUE}, values supplied to
+#'   \code{max_counts} and \code{max_children} will be used to infer the values
+#'   of intermediate ranks not specified. Linear interpolation between values of
+#'   specified ranks will be used to determine values of unspecified ranks.
+#' @param min_children (named \code{numeric}) The minimum number sub-taxa of
+#'   taxa for a given rank must have for its sequences to be searched. The names
+#'   correspond to taxonomic ranks.
+#' @param max_children (named \code{numeric}) The maximum number sub-taxa of
+#'   taxa for a given rank must have for its sequences to be searched. The names
+#'   correspond to taxonomic ranks.
+#' @param seqrange (character) Sequence range, as e.g., "1:1000". This is the
+#'   range of sequence lengths to search for. So "1:1000" means search for
+#'   sequences from 1 to 1000 characters in length.
+#' @param getrelated (logical) If TRUE, gets the longest sequences of a species
+#'   in the same genus as the one searched for. If FALSE, returns nothing if no
+#'   match found.
+#' @param fuzzy (logical) Whether to do fuzzy taxonomic ID search or exact
+#'   search. If \code{TRUE}, we use \code{xXarbitraryXx[porgn:__txid<ID>]}, but
+#'   if \code{FALSE}, we use \code{txid<ID>}. Default: \code{FALSE}
+#' @param limit (\code{numeric}) Number of sequences to search for and return.
+#'   Max of 10,000. If you search for 6000 records, and only 5000 are found, you
+#'   will of course only get 5000 back.
+#' @param entrez_query (\code{character}; length 1) An Entrez-format query to
+#'   filter results with. This is useful to search for sequences with specific
+#'   characteristics. The format is the same as the one used to seach genbank.
+#'   (\url{https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options})
+#'
+#'
+#' @param hypothetical (\code{logical}; length 1) If \code{FALSE}, an attempt
+#'   will be made to not return hypothetical or predicted sequences judging from
+#'   accession number prefixs (XM and XR). This can result in less than the
+#'   \code{limit} being returned even if there are more sequences available,
+#'   since this filtering is done after searching NCBI.
+#' @param verbose (\code{logical}) If \code{TRUE}, progress messages will be
+#'   printed.
+#'   
 #' @examples
 #' 
 #' \dontrun{
@@ -54,6 +81,8 @@ ncbi_taxon_sample <- function(name = NULL, id = NULL, target_rank,
                               seqrange = "1:3000", getrelated = FALSE,
                               fuzzy = TRUE, limit = 10, entrez_query = NULL,
                               hypothetical = FALSE, verbose = TRUE) {
+  # Check that the "traits" package has been installed
+  check_for_pkg("traits")
  
   run_once <- function(name, id) {
     default_target_max <- 20
@@ -115,7 +144,7 @@ ncbi_taxon_sample <- function(name = NULL, id = NULL, target_rank,
                                                       along.with = between))
           return(NULL)
         }
-        zoo::rollapply(names(user_limits), width = 2, set_default_counts)    
+        lapply(seq_len(length(names(user_limits)) - 1), function(i) set_default_counts(names(user_limits)[i:(i+1)]))
       }
       
       # Extend boundry values to adjacent undefined values - - - - - - - - - - - - - - - - - - - - - -
@@ -142,7 +171,7 @@ ncbi_taxon_sample <- function(name = NULL, id = NULL, target_rank,
     level_min_children <- get_level_limit(min_children, 0, target_rank, interpolate_min,
                                           extend_min = TRUE)
     
-    # Recursivly sample taxon ------------------------------------------------------------------------
+    # Recursively sample taxon ------------------------------------------------------------------------
     recursive_sample <- function(id, rank, name) {
       cat("Processing '", name, "' (uid: ", id, ", rank: ", as.character(rank), ")", "\n",
           sep = "")
diff --git a/man/ncbi_taxon_sample.Rd b/man/ncbi_taxon_sample.Rd
diff --git a/man/rescale.Rd b/man/rescale.Rd