I'm trying to put together a script that I hope will be able to do general purpose graphical plotting of newick trees, using ggtree
, since I like the graphics it produces - but I'm having a real battle with the scaling. All the trees represent RAxML created phylogenies of various orthologous genes from multiple operons, but they have different divergence, so the branch lengths change.
Consequently, I can't get a 'one-size-fits' all solution, even though the tip labels etc are all roughly the same length and so on.
Any one have any ideas/solutions?
The online advice is to manipulate the plot with xlim()
, but I can't get a value that works for everything so there must be a different approach?
EDIT I realise I probably should have provided some input data examples:
(PLT_U3:0.15318452419212538751,(((PAK_lumt:0.01321781409483185898,PAU_lumt:0.01604799170721057933)100:0.71562186374321934412,(PAK_pnf:0.02747409309026784680,PAU_pnf:0.04362199467390272950)100:0.50691772839805349093)100:0.45495607804840743071,((PLT_U2:0.07471966037279678674,(PAK_U2:0.04369444871016458370,(PAU_U4:0.04943960602593169135,PLT_U4:0.06992088986360529834)100:0.07699505948556126245)93:0.04296198668063311299)100:0.13609932089090265306,((PLT_cif:0.18031733116896464519,(PAK_cif:0.06553534447528822082,PAU_cif:0.07655342206635600000)100:0.12470806262593311931)100:0.05721370618688302601,(PLT_lopT:0.16602726877903850600,(PAU_lopT:0.05885402538087888824,PAK_lopT:0.05589448651685637731)100:0.12929347200668039886)100:0.37678937324769112838)83:0.05216373664865197463)13:0.03885961774638615335)100:0.03948590227624135252,PLT_U1:0.13780419834695847858);
(PAU_U4:0.06792319190492959735,(((PLT_U1:0.06195477762918412401,(PLT_U2:0.12045592724342514546,PAK_U2:0.21265815856088915448)100:0.52004517602278754751)5:0.15711720420449210023,PLT_cif:0.40116484314823280810)2:0.14333936648972814276,(((PLT_U3:0.42213854217590812690,PAU_pnf:0.17943312479099476908)100:0.76956668636749692158,PAK_pnf:0.15093876257250896100)2:0.09839881588263937884,((PAU_lumt:0.02288901001491689194,PAK_lumt:0.06104256543803018159)100:1.33083119633543733862,(PAK_cif:0.08817231165327119036,PAU_cif:0.09701531482354242009)100:0.11546649406117621972)1:0.19547073829905142750)0:0.08120010981800158956)100:0.07680997452514500001,PLT_U4:0.11610797908874445628);
(PAK_lopT:0.06651391671394939198,(PLT_lopT:0.18187634938920854699,((PLT_U3:0.23251798697175060648,((PAU_pnf:0.15496138724255065222,PAK_pnf:1.99528346250976329479)100:0.16686160081731740701,(PLT_U1:0.10461097884900900923,(PLT_U2:0.15555288738714362351,((PAK_U2:0.05113324204590339456,PLT_U4:0.08583546436225322762)77:0.01819158172459399425,PAU_U4:0.06539836169384372067)100:0.04799724061637739014)97:0.03888003859528409156)100:0.04785048005115901532)34:0.05052513178379124809)100:0.10254321566614935102,(PLT_cif:0.21706977119981013535,(PAK_cif:0.07257443179971820313,PAU_cif:0.07776709744327382767)100:0.10465000377441940893)100:0.18289985338304795559)100:0.06576728319168027859)100:0.10247061375837905606,PAU_lopT:0.05301219664966791423);
(((((PAU_cif:0.05922767792834276318,PAK_cif:0.05837720359630504952)94:0.09005527941638312439,((PAK_pnf:0.03460739505444795916,PAU_pnf:0.04879912846622813660)100:0.70674005938148853900,(PAU_lumt:0.03843455712248564082,PAK_lumt:0.00000165330215712699)100:2.16535995657618762777)0:0.37963937475336517746)1:0.01926963290101234988,PLT_cif:0.15134110397352903976)85:0.03443561230711384563,((PLT_U3:0.12913158017970277625,PLT_U1:0.11936749469565216542)100:0.03722418272442529208,((PLT_U4:0.04803350624910976419,(PAU_U4:0.05536155172508074040,PAK_U2:0.06178731880755951311)99:0.02561554572124531692)99:0.04254790111430536287,PLT_U2:0.11102863495981107889)99:0.04018563217866758658)71:0.01757929374071711542)100:0.03418339622670547168,(PAK_lopT:0.03878402207343702862,PAU_lopT:0.04279941666551179830)100:0.12608713896756232331,PLT_lopT:0.12952569778469022466);
(PAK_cif:0.07701941372805862218,((PLT_lopT:0.10022102575854190121,(PAK_lopT:0.06343982481436383214,PAU_lopT:0.04801968433327514357)57:0.01153274592106461049)77:0.07460548640965991574,(PLT_cif:0.29901813352443618044,(((PLT_U3:0.18702007738801146308,PLT_U1:0.10133584975349224644)96:0.05544310777994415629,((PAK_U2:0.13349596766321875085,(PAU_U4:0.07034515552713378750,PLT_U4:0.05587308028495038131)85:0.02829774872004332809)97:0.03801275278828684240,PLT_U2:0.14164702941908283162)82:0.02774318946500672553)89:0.03072610076255263140,((PAK_pnf:0.05907639105173269345,PAU_pnf:0.02931097601141012532)100:0.67413366446695455192,(PAU_lumt:0.02506753031159318287,PAK_lumt:0.02554038225094877601)100:1.69160259242493538068)2:0.39855273251040201909)28:0.05609084512611253737)57:0.03715138874461203916)96:0.12628685745219253578,PAU_cif:0.06674891883198276477);
Here's the script:
#!/usr/bin/env Rscript
suppressMessages(library("ggplot2"))
suppressMessages(library("argparse"))
suppressMessages(library("ape"))
suppressMessages(library("Biostrings"))
suppressMessages(library("ggtree"))
suppressMessages(library("tools"))
options(warn=-1)
# Parse commandline arguments
parser <- ArgumentParser()
parser$add_argument('-t',
'--tree',
action='store',
required=TRUE,
help="Tree file for plotting with ggtree in Newick format.")
parser$add_argument('-v',
'--verbose',
action='count',
default=0,
help='Verbose behaviour, printing up to 3 levels of different output. Default off.')
parser$add_argument('-o',
'--outfile',
action='store',
default='None',
help='Filename and path to save the image to. Defaults to the same as the infile with .png appended')
args <-parser$parse_args()
# Reassign to standard variables to pass through functions
tree <- args$tree
outfile <- args$outfile
verbose <- args$verbose
# Function to synthesise an output path/name
getOutfile <- function(tree){
outfile <- paste(paste(dirname(tree),
file_path_sans_ext(basename(tree)),
sep="/"),
"png",
sep = ".")
return(outfile)
}
# If no output file specified, redefine outfile to infile + ".png"
if (outfile == 'None'){
outfile <- getOutfile(tree)
}
if (verbose > 0){
cat("\n", "Reading in tree:", "\n")
cat("=================", "\n")
cat(tree, "\n")
}
# Import tree data
tree.obj <- read.tree(args$tree)
if (verbose > 1){
cat("\n", "Your data structure:", "\n")
cat("=====================", "\n")
print.phylo(tree.obj, printlen = length(tree.obj$tip.label))
cat("\n")
write.tree(tree.obj)
}
# Plot and customise tree space
tree.img <- ggtree(tree.obj, size=0.75) + xlim(NA, 2)
# Add tip labels (right aligned)
tree.img <- tree.img + geom_tiplab(align=T,
linesize = 0.3,
offset = 0.1)
# size = 8)
# Add bootstraps/node labels
tree.img <- tree.img + geom_text2(size=5,
aes(subset = !isTip, label=label),
nudge_x = 0.02)
# Add tip shapes
tree.img <- tree.img + geom_tippoint(color='firebrick')
# Add scalebar
tree.img <- tree.img + geom_treescale(fontsize=5,
linesize = 2,
offset = -1)
# Make transparent
tree.img <- tree.img + theme(plot.background = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.background = element_blank(),
plot.margin = unit(c(0.1,0.1,0.1,0.1),"cm"))
tree.img
if (verbose > 0){
cat("\n", "Saving tree image to:", "\n")
cat("======================", "\n")
cat(outfile, "\n")
}
ggsave(outfile,
plot = tree.img,
height = 8,
width = 12,
units = "in",
dpi = 1200,
bg = "transparent")
cat("All done.", "\n")
But as you can see from a selection of images here, the trees are all over the place. The first 2 (of this random selection) have done OK with this axis setting, but the 3rd is awful.
https://ibb.co/mb4VmF https://ibb.co/cBSi6F https://ibb.co/g7n5La
Does this work for an individual tree? I'm not sure if I made it unclear in the OP, I'm passing in a single tree at a time from the commandline.
Adding
+ facet_wrap(~.id)
to the firstggtree(tree.obj)
line gives me the following error:Error in combine_vars(data, params$plot_env, vars, drop = params$drop) : At least one layer must contain all variables used for facetting Calls: <Anonymous> ... f -> <Anonymous> -> f -> <Anonymous> -> combine_vars Execution halted