Hello Biostar Community!
I would like to ask, how can I create a biplot PCA from metadata. I would like to plot the samples on a PCA plot containing the plotted variability in the x and y-axises. Moreover, the top 5 microbes that shaping the place of the samples in the PCA plot, which are displayed as vectors (arrows). The data is the following:
structure(list(AT_A_16 = c(0, 57260.86957, 0, 2906.832298, 0,
559.0062112, 695.6521739, 0, 0, 0, 0, 0, 0, 0, 80.74534161, 0,
6.211180124, 18.63354037, 2496.89441, 0, 0, 12.42236025, 68.32298137,
0, 0, 347.826087, 24.8447205, 6.211180124, 0, 689.4409938, 1832.298137,
6.211180124, 4472.049689, 6.211180124, 664.5962733, 0, 0, 801.242236,
0, 422.3602484), AT_A_22 = c(64.51612903, 96677.41935, 0, 903.2258065,
0, 451.6129032, 6064.516129, 0, 0, 0, 322.5806452, 96.77419355,
0, 0, 0, 0, 32.25806452, 290.3225806, 870.9677419, 0, 0, 32.25806452,
193.5483871, 0, 0, 64.51612903, 161.2903226, 0, 0, 64.51612903,
161.2903226, 32.25806452, 387.0967742, 32.25806452, 11225.80645,
0, 129.0322581, 612.9032258, 32.25806452, 96.77419355), AT_A_26 = c(126.2886598,
958.7628866, 10.30927835, 4211.340206, 12.88659794, 139.1752577,
113.4020619, 0, 5.154639175, 0, 0, 46.39175258, 0, 0, 2814.43299,
208.7628866, 12.88659794, 652.0618557, 3884.020619, 0, 0, 41.2371134,
3963.917526, 0, 0, 28.35051546, 231.9587629, 0, 0, 15.46391753,
149.4845361, 38.65979381, 260.3092784, 149.4845361, 927.8350515,
0, 438.1443299, 10023.19588, 301.5463918, 110.8247423), AT_A_32 = c(0,
781937.5, 62.5, 50, 112.5, 3050, 0, 0, 0, 0, 0, 23612.5, 0, 0,
12.5, 0, 175, 21312.5, 137.5, 0, 0, 87.5, 5150, 0, 0, 12.5, 1562.5,
0, 0, 0, 337.5, 62.5, 350, 87.5, 16250, 0, 25, 175, 1850, 250
), AT_A_36 = c(40.6504065, 37642.27642, 0, 130.0813008, 0, 780.4878049,
73.17073171, 0, 40.6504065, 0, 0, 1422.764228, 0, 0, 32.5203252,
1560.97561, 32.5203252, 8048.780488, 520.3252033, 0, 0, 414.6341463,
3788.617886, 0, 0, 73.17073171, 154.4715447, 0, 0, 48.7804878,
113.8211382, 373.9837398, 40.6504065, 601.6260163, 4601.626016,
0, 24.3902439, 560.9756098, 154.4715447, 40.6504065), AT_A_41 = c(0.898472597,
2740.34142, 0, 2872.416891, 0, 889.4878706, 1654.986523, 0, 341.4195867,
0, 0, 157.2327044, 0, 0, 13.47708895, 0, 0, 115.0044924, 516.621743,
0, 0, 26.0557053, 61.09613657, 0, 0, 2778.077269, 46.72057502,
0.898472597, 0, 315.3638814, 15371.96765, 213.836478, 2774.483378,
0.898472597, 108.7151842, 0, 44.92362983, 3868.823001, 210.2425876,
21297.39443), AT_A_46 = c(0, 0, 15.38461538, 71.79487179, 0,
119523.0769, 20.51282051, 0, 153.8461538, 0, 0, 502.5641026,
0, 0, 2466.666667, 0, 0, 1112.820513, 13353.84615, 0, 0, 87.17948718,
210.2564103, 0, 0, 194.8717949, 0, 66.66666667, 0, 0, 1184.615385,
420.5128205, 46.15384615, 123.0769231, 10.25641026, 0, 28282.05128,
61553.84615, 0, 1656.410256), AT_B_11 = c(30.3030303, 169545.4545,
0, 60.60606061, 0, 1545.454545, 0, 0, 0, 0, 0, 0, 0, 0, 303.030303,
0, 0, 0, 181.8181818, 0, 0, 0, 0, 0, 0, 0, 60.60606061, 0, 0,
0, 16393.93939, 0, 76363.63636, 121.2121212, 1666.666667, 0,
333.3333333, 242.4242424, 0, 0), AT_B_13 = c(79.13669065, 724568.3453,
0, 21.58273381, 0, 8093.52518, 14.38848921, 0, 100.7194245, 0,
0, 1683.453237, 0, 0, 755.3956835, 50.35971223, 21.58273381,
4071.942446, 338.1294964, 0, 0, 7.194244604, 158.2733813, 0,
0, 7.194244604, 194.2446043, 0, 0, 0, 2604.316547, 1417.266187,
1726.618705, 57.55395683, 5676.258993, 0, 64.74820144, 417.2661871,
223.0215827, 230.2158273), AT_B_24 = c(305.5555556, 2177000,
0, 166.6666667, 0, 111.1111111, 55.55555556, 0, 0, 0, 0, 55.55555556,
0, 0, 138.8888889, 27.77777778, 0, 0, 250, 0, 27.77777778, 27.77777778,
166.6666667, 0, 0, 83.33333333, 83.33333333, 27.77777778, 0,
0, 186027.7778, 0, 694.4444444, 972.2222222, 2000, 0, 750, 527.7777778,
27.77777778, 638.8888889), AT_B_26 = c(1197.879859, 173724.3816,
0, 127.2084806, 0, 1102.473498, 3.533568905, 0, 0, 0, 7.067137809,
28.26855124, 0, 0, 279.1519435, 14.13427562, 3.533568905, 14.13427562,
10876.32509, 0, 922.2614841, 0, 166.0777385, 0, 0, 21.20141343,
367.4911661, 0, 0, 0, 1413.427562, 91.87279152, 7989.399293,
2392.226148, 2462.897527, 0, 1385.159011, 10141.34276, 28.26855124,
215.5477032), AT_B_37 = c(150L, 2728500L, 0L, 350L, 0L, 7050L,
100L, 0L, 600L, 0L, 0L, 50L, 0L, 0L, 200L, 0L, 50L, 0L, 5300L,
0L, 50L, 0L, 0L, 0L, 0L, 50L, 50L, 0L, 0L, 0L, 141950L, 50L,
872150L, 300L, 39600L, 0L, 150L, 300L, 100L, 400L), AT_B_42 = c(0,
4941210.526, 0, 0, 0, 421.0526316, 0, 0, 947.3684211, 0, 0, 210.5263158,
0, 0, 0, 0, 105.2631579, 52.63157895, 9315.789474, 0, 0, 52.63157895,
210.5263158, 0, 0, 0, 736.8421053, 0, 0, 0, 38736.84211, 0, 126842.1053,
105.2631579, 947.3684211, 0, 0, 157.8947368, 105.2631579, 0),
BT_A_11 = c(0, 8394.230769, 0, 125, 0, 384.6153846, 0, 19.23076923,
19.23076923, 0, 0, 134.6153846, 0, 0, 0, 0, 0, 1769.230769,
471.1538462, 0, 0, 288.4615385, 9.615384615, 0, 0, 769.2307692,
500, 0, 0, 0, 750, 0, 9.615384615, 0, 0, 0, 9.615384615,
4961.538462, 0, 1009.615385), BT_A_16 = c(0, 212.7192982,
0, 0, 0, 271.9298246, 0, 4.385964912, 2.192982456, 0, 0,
416.6666667, 0, 0, 0, 0, 0, 129.3859649, 2635.964912, 0,
0, 206.1403509, 2.192982456, 0, 0, 311.4035088, 660.0877193,
0, 0, 0, 383.7719298, 0, 739.0350877, 0, 0, 0, 0, 2800.438596,
0, 171.0526316), BT_A_22 = c(28.57142857, 76857.14286, 0,
628.5714286, 0, 4857.142857, 0, 28.57142857, 0, 0, 0, 0,
0, 0, 0, 0, 0, 600, 57.14285714, 28.57142857, 0, 0, 0, 0,
0, 9685.714286, 542.8571429, 0, 0, 0, 0, 0, 228.5714286,
0, 0, 0, 0, 7685.714286, 0, 85.71428571), BT_A_26 = c(0,
8114.832536, 0, 0, 0, 363.6363636, 0, 0, 0, 0, 0, 2612.440191,
0, 0, 0, 0, 0, 4.784688995, 684.2105263, 0, 0, 57.41626794,
0, 0, 0, 1502.392344, 215.3110048, 0, 0, 0, 71.77033493,
0, 2114.832536, 0, 0, 0, 0, 1392.344498, 0, 287.0813397),
BT_A_32 = c(0, 255214.2857, 0, 0, 0, 285.7142857, 0, 71.42857143,
0, 0, 0, 142.8571429, 0, 0, 0, 0, 0, 0, 357.1428571, 142.8571429,
0, 0, 0, 0, 0, 0, 142.8571429, 0, 0, 0, 142.8571429, 0, 71.42857143,
0, 0, 0, 142.8571429, 500, 0, 142.8571429), BT_A_36 = c(0,
21454.54545, 0, 90.90909091, 0, 19909.09091, 0, 45.45454545,
0, 0, 0, 10772.72727, 0, 0, 0, 0, 0, 0, 9181.818182, 90.90909091,
0, 0, 45.45454545, 0, 0, 26454.54545, 500, 0, 0, 0, 90.90909091,
0, 16954.54545, 0, 45.45454545, 0, 90.90909091, 7045.454545,
0, 9181.818182), BT_A_41 = c(25.42372881, 203.3898305, 0,
177.9661017, 0, 8830.508475, 0, 25.42372881, 330.5084746,
0, 0, 1330.508475, 0, 0, 0, 0, 0, 220.3389831, 13449.15254,
8.474576271, 0, 0, 279.6610169, 0, 0, 1067.79661, 33.89830508,
0, 0, 0, 16.94915254, 0, 1338.983051, 0, 8.474576271, 0,
677.9661017, 1025.423729, 0, 720.3389831), BT_A_46 = c(0,
1258.547009, 0, 0, 0, 113.2478632, 0, 0, 0, 0, 0, 260.6837607,
0, 0, 0, 0, 0, 42.73504274, 363.2478632, 0, 0, 365.3846154,
0, 0, 0, 290.5982906, 282.0512821, 0, 0, 0, 427.3504274,
0, 1448.717949, 0, 0, 0, 0, 1410.25641, 0, 10.68376068),
BT_B_11 = c(0, 32307.69231, 0, 0, 0, 5435.897436, 0, 0, 564.1025641,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128.2051282, 0, 0,
820.5128205, 230.7692308, 0, 0, 0, 25.64102564, 0, 21820.51282,
0, 0, 76.92307692, 461.5384615, 76.92307692, 1025.641026,
0), BT_B_13 = c(0, 217000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 142.8571429, 0, 142.8571429, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 5571.428571, 0, 714.2857143, 0, 0, 0, 0, 0,
16857.14286, 0), BT_B_24 = c(0, 121333.3333, 0, 0, 0, 666.6666667,
0, 0, 0, 200, 0, 0, 0, 0, 66.66666667, 0, 0, 0, 0, 66.66666667,
0, 133.3333333, 0, 0, 0, 2200, 0, 0, 0, 0, 133.3333333, 0,
74666.66667, 0, 0, 0, 400, 0, 12800, 0), BT_B_26 = c(0, 237285.7143,
0, 0, 0, 0, 0, 0, 0, 142.8571429, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 428.5714286, 285.7142857, 0, 0, 0, 1000,
0, 275000, 0, 0, 0, 0, 142.8571429, 6428.571429, 0), BT_B_34 = c(0,
24697.67442, 46.51162791, 0, 0, 325.5813953, 0, 0, 46.51162791,
139.5348837, 0, 69.76744186, 0, 93.02325581, 0, 0, 0, 581.3953488,
883.7209302, 23.25581395, 0, 186.0465116, 46.51162791, 0,
0, 465.1162791, 395.3488372, 0, 0, 0, 4720.930233, 0, 10093.02326,
69.76744186, 0, 23.25581395, 255.8139535, 767.4418605, 2139.534884,
0), BT_B_37 = c(0L, 53240L, 40L, 120L, 0L, 1840L, 0L, 40L,
80L, 280L, 40L, 40L, 40L, 120L, 0L, 0L, 0L, 760L, 2560L,
200L, 0L, 560L, 120L, 40L, 0L, 1160L, 560L, 0L, 40L, 0L,
8240L, 0L, 18800L, 80L, 0L, 0L, 240L, 1680L, 3680L, 0L),
BT_B_42 = c(0, 9164.179104, 0, 0, 0, 4.975124378, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1119.402985, 0, 0, 0, 4.975124378, 0, 990.0497512,
0), BT_B_45 = c(0, 14133.33333, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 33.33333333, 0, 0, 50, 0, 0, 0, 0, 0, 0, 266.6666667,
0, 0, 0, 1133.333333, 0, 19550, 0, 0, 283.3333333, 750, 366.6666667,
1033.333333, 0), BT_C_12 = c(0, 80, 0, 0, 160, 373.3333333,
0, 13.33333333, 0, 0, 13.33333333, 133.3333333, 240, 120,
0, 0, 0, 53.33333333, 20426.66667, 0, 0, 146.6666667, 160,
0, 1093.333333, 26800, 2133.333333, 0, 0, 53.33333333, 21306.66667,
0, 40, 0, 0, 0, 266.6666667, 22320, 26.66666667, 0), BT_C_27 = c(0,
125, 0, 6916.666667, 0, 3208.333333, 7416.666667, 500, 583.3333333,
0, 0, 0, 125, 0, 83.33333333, 0, 41.66666667, 916.6666667,
416.6666667, 41.66666667, 0, 125, 3250, 0, 0, 125, 41.66666667,
0, 0, 0, 541.6666667, 0, 333.3333333, 0, 0, 125, 5750, 125,
41.66666667, 0), BT_C_45 = c(114.2857143, 0, 0, 2900, 0,
2828.571429, 485.7142857, 1414.285714, 57.14285714, 0, 0,
0, 0, 0, 200, 0, 0, 571.4285714, 0, 57.14285714, 0, 0, 2514.285714,
0, 0, 0, 0, 0, 0, 0, 28.57142857, 0, 0, 0, 57.14285714, 0,
11157.14286, 0, 0, 0), BT_C_47 = c(0, 0, 0, 0, 0, 52.63157895,
3263.157895, 0, 0, 0, 0, 157.8947368, 105.2631579, 3684.210526,
52.63157895, 0, 105.2631579, 6736.842105, 263.1578947, 0,
0, 894.7368421, 52.63157895, 0, 52.63157895, 894.7368421,
157.8947368, 0, 0, 578.9473684, 50578.94737, 0, 0, 0, 0,
0, 0, 13368.42105, 894.7368421, 0), BT_D_13 = c(0, 0, 0,
0, 0, 378.3783784, 0, 0, 0, 148.6486486, 0, 2040.540541,
0, 81.08108108, 54.05405405, 0, 0, 94.59459459, 7162.162162,
0, 0, 1743.243243, 40.54054054, 148.6486486, 27.02702703,
5121.621622, 716.2162162, 0, 0, 13.51351351, 6851.351351,
0, 10162.16216, 0, 0, 0, 13.51351351, 8283.783784, 864.8648649,
4770.27027), BT_D_22 = c(0, 2346.153846, 0, 76.92307692,
0, 29884.61538, 0, 115.3846154, 0, 0, 500, 0, 0, 0, 0, 0,
0, 10846.15385, 2884.615385, 0, 0, 38.46153846, 307.6923077,
38.46153846, 0, 4461.538462, 76.92307692, 0, 0, 38.46153846,
8346.153846, 0, 12153.84615, 38.46153846, 0, 0, 769.2307692,
230.7692308, 730.7692308, 38.46153846), BT_D_24 = c(12.77955272,
460.0638978, 0, 0, 0, 0, 0, 0, 0, 0, 0, 405.7507987, 0, 0,
6.389776358, 0, 0, 0, 2677.316294, 0, 0, 0, 207.6677316,
175.7188498, 0, 1658.146965, 25.55910543, 0, 0, 0, 2444.089457,
0, 1610.223642, 0, 0, 0, 19.16932907, 146.9648562, 1773.162939,
916.9329073), BT_D_42 = c(29.41176471, 88.23529412, 29.41176471,
617.6470588, 0, 14352.94118, 0, 529.4117647, 147.0588235,
0, 147.0588235, 29.41176471, 29.41176471, 352.9411765, 117.6470588,
0, 0, 2088.235294, 134176.4706, 0, 0, 58.82352941, 470.5882353,
0, 0, 20176.47059, 88.23529412, 0, 0, 29.41176471, 8794.117647,
0, 1647.058824, 0, 0, 117.6470588, 882.3529412, 4352.941176,
5970.588235, 88.23529412), BT_E_14 = c(0, 69.76744186, 23.25581395,
953.4883721, 0, 2581.395349, 0, 0, 0, 0, 0, 69.76744186,
0, 0, 139.5348837, 0, 0, 5465.116279, 2930.232558, 23.25581395,
0, 465.1162791, 162.7906977, 23.25581395, 0, 4209.302326,
0, 0, 186.0465116, 116.2790698, 395.3488372, 0, 465.1162791,
93.02325581, 23.25581395, 0, 418.6046512, 1627.906977, 279.0697674,
69.76744186), BT_E_25 = c(0, 68.96551724, 103.4482759, 2310.344828,
0, 5758.62069, 0, 0, 0, 0, 0, 68.96551724, 0, 0, 275.862069,
0, 0, 8827.586207, 1827.586207, 0, 0, 241.3793103, 241.3793103,
0, 0, 9275.862069, 68.96551724, 0, 68.96551724, 103.4482759,
517.2413793, 0, 137.9310345, 34.48275862, 103.4482759, 0,
1068.965517, 3241.37931, 206.8965517, 34.48275862), BT_E_35 = c(0,
0, 0, 0, 0, 333.3333333, 0, 222.2222222, 0, 0, 0, 0, 0, 0,
0, 0, 888.8888889, 0, 222.2222222, 0, 111.1111111, 6111.111111,
111.1111111, 0, 0, 111.1111111, 35888.88889, 0, 0, 0, 540666.6667,
0, 111.1111111, 0, 0, 0, 0, 294222.2222, 111.1111111, 0),
BT_E_42 = c(0, 102.5641026, 1333.333333, 435.8974359, 0,
128.2051282, 0, 51.28205128, 0, 0, 0, 25.64102564, 230.7692308,
0, 0, 0, 0, 102.5641026, 2461.538462, 0, 25.64102564, 256.4102564,
0, 0, 0, 461.5384615, 51.28205128, 0, 0, 153.8461538, 1025.641026,
0, 820.5128205, 0, 0, 0, 153.8461538, 692.3076923, 333.3333333,
205.1282051)), class = "data.frame", row.names = c("Actinomyces_sp._oral_taxon_897",
"Aggregatibacter_actinomycetemcomitans", "Aggregatibacter_aphrophilus",
"Atopobium_parvulum", "Campylobacter_gracilis", "Candidatus_Saccharibacteria",
"Capnocytophaga_gingivalis", "Capnocytophaga_haemolytica", "Capnocytophaga_leadbetteri",
"Capnocytophaga_sp._ChDC_OS43", "Capnocytophaga_sputigena", "Cardiobacterium_hominis",
"Corynebacterium_kroppenstedtii", "Desulfomicrobium_orale", "Dialister_pneumosintes",
"Eikenella_corrodens", "Enterobacter_cloacae", "Filifactor_alocis",
"Fusobacterium_nucleatum", "Lactobacillus_jensenii", "Lautropia_mirabilis",
"Leptotrichia_buccalis", "Leptotrichia_sp._oral_taxon_212", "Leptotrichia_sp._oral_taxon_498",
"Olsenella_sp._oral_taxon_807", "Parvimonas_micra", "Porphyromonas_asaccharolytica",
"Porphyromonas_gingivalis", "Prevotella_dentalis", "Prevotella_denticola",
"Prevotella_intermedia", "Prevotella_melaninogenica", "Prevotella_oris",
"Prevotella_sp._oral_taxon_299", "Pseudopropionibacterium_propionicum",
"Rothia_mucilaginosa", "Selenomonas_sputigena", "Tannerella_forsythia",
"Tannerella_sp._oral_taxon_HOT-286", "Treponema_sp._OMZ_838"))
The output could be something like this:
I hope the community can help me out to solve this problem.
I suggest you read the vignette of e.g. PCAtools. It should contain everything you need. By the way @all, this is R
dput
output so you can copy/paste it into R directly.Thank you ATpoint! PCAtools is a great option for solving the problem (I already found it). Although, I have trouble transferring the given example to my data. In this vignette, the example material is about gene expression analysis. It is great, but it is hard for me to decipher the code to the given metadata to make the PCA biplot. (Sorry, I'm new in R code writing).
OP data has multiple organisms to plot unlike limited number in the shared image. Please follow, ATpoint instructions. You would get some thing like this:
and scree plot like:
Thanks, cpad0112. Can you share the R code with me which you use to give these plots?