森林圖是以統計指標和統計分析方法為基礎,用數值運算結果繪制出的圖型。它在平面直角坐標系中,以一條垂直的無效線 (橫坐標刻度為1或0)為中心,用平行于橫軸的多條線描述每個被納入研究的效應量和可信區間,用一個棱形 (或其它圖形)描述多個研究合并的效應量及可信區間,簡單直觀地描述了Meta分析的統計結果。森林圖是展示效應量及其可信區間的重要可視化工具,下面我們詳細介紹如何使用ggplot2包創建自定義的森林圖。
####################################森林圖繪制#################################### # ====================== # 第一部分:準備工作 # ====================== # 清除工作環境 rm(list = ls()) options(stringsAsFactors = FALSE) # 加載必要的包 if(!require(ggplot2)) install.packages("ggplot2")
## 載入需要的程序包:ggplot2
## Warning: 程序包'ggplot2'是用R版本4.4.3 來建造的
if(!require(dplyr)) install.packages("tidyverse")
## 載入需要的程序包:dplyr
## ## 載入程序包:'dplyr'
## The following objects are masked from 'package:stats': ## ## filter, lag
## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union
library(ggplot2) library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── ## ? forcats 1.0.0 ? stringr 1.5.1 ## ? lubridate 1.9.4 ? tibble 3.2.1 ## ? purrr 1.0.2 ? tidyr 1.3.1 ## ? readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ## ? dplyr::filter() masks stats::filter() ## ? dplyr::lag() masks stats::lag() ## ? Use the conflicted package ( ) to force all conflicts to become errors
# ====================== # 第二部分:創建模擬數據集 # ====================== # 創建模擬數據 set.seed(123) forest_data <- data.frame( Varnames = paste("Variable", LETTERS[1:10]), Factor = rep(c("Group1", "Group2"), each = 5), OR = c(1.2, 1.5, 0.8, 1.1, 0.9, 1.8, 2.1, 1.4, 0.7, 1.3), Lower = c(0.9, 1.1, 0.6, 0.8, 0.7, 1.3, 1.5, 1.0, 0.5, 0.9), Upper = c(1.5, 1.9, 1.0, 1.4, 1.1, 2.3, 2.7, 1.8, 0.9, 1.7), Sample = sample(50:200, 10) ) %>% mutate(Varnames = factor(Varnames, levels = Varnames)) # 保持原始順序 # 創建注釋數據 annotation_data <- data.frame( x = c(0.6, 2.2), y = c(3, 8), label = c("Protective Effect", "Risk Effect") ) # ====================== # 第三部分:基礎森林圖 # ====================== basic_forest <- ggplot(forest_data, aes(x = OR, y = Varnames, color = Factor)) + geom_point(size = 3) + geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) + geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) + scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) + labs(x = "Odds Ratio (95% CI)", y = "", title = "Basic Forest Plot") + theme_bw() + theme(legend.position = "top", plot.title = element_text(hjust = 0.5, face = "bold"), axis.text.y = element_text(face = "bold"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ? Please use `linewidth` instead. ## This warning is displayed once every 8 hours. ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was ## generated.
print(basic_forest)
# ggsave("basic_forest.png", width = 8, height = 6, dpi = 300) # ====================== # 第四部分:添加注釋的森林圖 # ====================== annotated_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) + geom_point(aes(color = Factor), size = 3) + geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) + geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) + scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) + labs(x = "Odds Ratio (95% CI)", y = "", title = "Annotated Forest Plot") + theme_bw() + theme(legend.position = "top", plot.title = element_text(hjust = 0.5, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(face = "bold")) + geom_text(data = annotation_data, aes(x = x, y = y, label = label), color = "blue", fontface = "bold", size = 4) + annotate("text", x = 1, y = 10.5, label = "Reference Line (OR = 1)", color = "red", vjust = -0.5) print(annotated_forest)
# ggsave("annotated_forest.png", width = 9, height = 6, dpi = 300) # ====================== # 第五部分:點大小表示樣本量的森林圖 # ====================== size_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) + geom_point(aes(size = Sample, color = Factor), alpha = 0.7) + geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) + geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) + scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) + scale_size_continuous(range = c(3, 10), breaks = c(50, 100, 150, 200), labels = c("50", "100", "150", "200")) + labs(x = "Odds Ratio (95% CI)", y = "", title = "Forest Plot with Sample Size", size = "Sample Size", color = "Group") + theme_bw() + theme(legend.position = "right", plot.title = element_text(hjust = 0.5, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(face = "bold"), legend.box = "vertical") print(size_forest)
# ggsave("size_forest.png", width = 10, height = 6, dpi = 300) # ====================== # 第六部分:高級定制森林圖 # ====================== advanced_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) + # 添加背景區域 annotate("rect", xmin = -Inf, xmax = 1, ymin = -Inf, ymax = Inf, fill = "lightblue", alpha = 0.05) + annotate("rect", xmin = 1, xmax = Inf, ymin = -Inf, ymax = Inf, fill = "pink", alpha = 0.05) + # 添加數據點 geom_point(aes(size = Sample, color = Factor), shape = 18) + # 添加誤差線 geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.1, size = 0.8) + # 添加參考線 geom_vline(xintercept = 1, linetype = "dashed", color = "black", size = 1) + # 坐標軸設置 scale_x_continuous(limits = c(0.4, 3), breaks = seq(0.5, 3, 0.5)) + scale_color_manual(values = c("Group1" = "#1f78b4", "Group2" = "#e31a1c")) + scale_size_continuous(range = c(3, 10)) + # 標簽和標題 labs(x = "\nOdds Ratio with 95% Confidence Interval", y = "", title = "Advanced Customized Forest Plot", subtitle = "Effect sizes with sample size indicators", caption = "Reference line at OR = 1.0\nDashed lines represent 95% CIs", color = "Study Group", size = "Sample Size") + # 主題設置 theme_minimal() + theme( plot.title = element_text(face = "bold", hjust = 0.5, size = 14), plot.subtitle = element_text(hjust = 0.5, size = 12), axis.text.y = element_text(face = "bold", size = 10), axis.text.x = element_text(size = 10), axis.title.x = element_text(face = "bold", size = 11), legend.position = "bottom", legend.box = "horizontal", panel.grid.major = element_line(color = "gray90"), panel.grid.minor = element_blank(), plot.background = element_rect(fill = "white", color = NA) ) + # 添加注釋 geom_text(data = annotation_data, aes(x = x, y = y, label = label), color = c("#1f78b4", "#e31a1c"), fontface = "bold", size = 4.5) + # 添加箭頭指示 annotate("segment", x = 0.6, xend = 0.8, y = 3, yend = 3, arrow = arrow(length = unit(0.2, "cm")), color = "#1f78b4") + annotate("segment", x = 2.2, xend = 2.0, y = 8, yend = 8, arrow = arrow(length = unit(0.2, "cm")), color = "#e31a1c") print(advanced_forest)
# ggsave("advanced_forest.png", width = 10, height = 7, dpi = 300) # ====================== # 第七部分:數據說明 # ====================== cat(" 主數據結構說明: - Varnames: 變量名稱 (在y軸顯示) - Factor: 分組變量 (用于顏色分組) - OR: 效應量 (比值比) - Lower: 置信區間下限 - Upper: 置信區間上限 - Sample: 樣本量 (可用于點的大小) 注釋數據結構說明: - x: 注釋文本的x坐標位置 - y: 注釋文本的y坐標位置 - label: 要顯示的文本內容 ")
## ## 主數據結構說明: ## - Varnames: 變量名稱 (在y軸顯示) ## - Factor: 分組變量 (用于顏色分組) ## - OR: 效應量 (比值比) ## - Lower: 置信區間下限 ## - Upper: 置信區間上限 ## - Sample: 樣本量 (可用于點的大小) ## ## 注釋數據結構說明: ## - x: 注釋文本的x坐標位置 ## - y: 注釋文本的y坐標位置 ## - label: 要顯示的文本內容
# ====================== # 第八部分:總結 # ====================== cat(" 使用說明: 1. 基礎森林圖展示了最基本的森林圖元素 2. 注釋森林圖添加了文字說明和標記 3. 樣本量森林圖通過點大小反映樣本量信息 4. 高級森林圖展示了更多定制化選項 使用建議: - 根據實際數據調整坐標軸范圍 - 選擇合適的顏色方案 - 調整圖例位置和樣式以適應不同需求 - 使用ggsave()保存高質量圖片 ")
## ## 使用說明: ## 1. 基礎森林圖展示了最基本的森林圖元素 ## 2. 注釋森林圖添加了文字說明和標記 ## 3. 樣本量森林圖通過點大小反映樣本量信息 ## 4. 高級森林圖展示了更多定制化選項 ## ## 使用建議: ## - 根據實際數據調整坐標軸范圍 ## - 選擇合適的顏色方案 ## - 調整圖例位置和樣式以適應不同需求 ## - 使用ggsave()保存高質量圖片
#########文獻森林圖1,飲料消費與遺傳風險的森林圖############## # 創建數據框 beverage_data <- data.frame( GeneticRisk = rep(c("Low", "Intermediate", "High"), each = 12), Consumption = rep(rep(c("0/day", ">0-1/day", ">1-2/day", ">2/day"), 3), each = 3), BeverageType = rep(c("SSBs", "ASBs", "PJs"), 12), HR = c(1.00, 0.97, 0.87, 1.48, 1.00, 0.82, 1.28, 1.50, 1.00, 0.85, 1.06, 1.12, 1.00, 0.97, 1.03, 1.17, 1.00, 1.02, 1.20, 1.21, 1.00, 0.93, 1.01, 0.96, 1.00, 0.99, 1.03, 1.15, 1.00, 1.08, 1.16, 1.19, 1.00, 0.89, 0.92, 0.82), LowerCI = c(NA, 0.84, 0.69, 1.14, NA, 0.67, 1.00, 1.08, NA, 0.75, 0.88, 0.83, NA, 0.90, 0.92, 1.00, NA, 0.92, 1.05, 1.01, NA, 0.87, 0.91, 0.80, NA, 0.92, 0.91, 0.99, NA, 0.98, 1.00, 0.99, NA, 0.84, 0.82, 0.68), UpperCI = c(NA, 1.12, 1.09, 1.91, NA, 0.99, 1.64, 2.08, NA, 0.96, 1.27, 1.53, NA, 1.05, 1.15, 1.36, NA, 1.12, 1.38, 1.45, NA, 0.99, 1.12, 1.14, NA, 1.08, 1.16, 1.35, NA, 1.19, 1.33, 1.44, NA, 0.96, 1.02, 0.99) ) # 繪制森林圖 ggplot(beverage_data, aes(x = HR, y = interaction(Consumption, BeverageType), color = BeverageType)) + geom_point(size = 3, position = position_dodge(width = 0.5)) + geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2, position = position_dodge(width = 0.5)) + geom_vline(xintercept = 1, linetype = "dashed") + facet_grid(GeneticRisk ~ ., scales = "free_y", space = "free_y") + scale_color_manual(values = c("SSBs" = "#E41A1C", "ASBs" = "#377EB8", "PJs" = "#4DAF4A")) + labs(x = "Hazard Ratio (95% CI)", y = "Consumption Level", title = "Association between beverage consumption and risk by genetic risk level") + theme_minimal() + theme(panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), strip.text = element_text(face = "bold"))
## Warning: Removed 9 rows containing missing values or values outside the scale range ## (`geom_errorbarh()`).
#########文獻森林圖2,亞組分析的森林圖############## subgroup_data <- data.frame( Subgroup = c("All patients", "Age ≤72 yr", "Age >72 yr", "Female", "Male", "Asian", "Black", "White", "Other", "Europe or Saudi Arabia", "Asia", "Latin America", "North America", "NYHA class II", "NYHA class III or IV", "LVEF ≤49%", "LVEF 50-59%", "LVEF ≥60%", "NT-proBNP ≤1011 pg/ml", "NT-proBNP >1011 pg/ml", "Not recently hospitalized", "Recently hospitalized", "No diabetes", "Diabetes", "No atrial fibrillation", "Atrial fibrillation", "BMI <30", "BMI ≥30", "eGFR <60 ml/min", "eGFR ≥60 ml/min", "SBP ≤128 mm Hg", "SBP >128 mm Hg", "No previous LVEF ≤40%", "Previous LVEF ≤40%"), HR = c(0.82, 0.82, 0.81, 0.81, 0.81, 0.81, 0.83, 0.78, 0.80, 0.84, 0.82, 0.82, 0.71, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81), # 補充4個值使總數達到34 LowerCI = c(0.73, 0.69, 0.67, 0.69, 0.67, 0.67, 0.46, 0.57, 0.65, 0.68, 0.69, 0.72, 0.60, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69), # 補充4個值 UpperCI = c(0.92, 0.96, 0.97, 0.96, 0.97, 0.97, 1.48, 1.07, 0.98, 1.02, 0.96, 0.94, 0.85, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96), # 補充4個值 EventsDapa = c(512, 247, 265, 195, 317, 97, 21, 372, 22, 261, 92, 70, 89, 331, 181, 207, 174, 131, 173, 339, 419, 93, 242, 270, 285, 227, 275, 236, 289, 223, 285, 227, 275, 236), # 補充4個值 TotalDapa = c(3131, 1545, 1586, 1364, 1767, 630, 81, 2214, 206, 1494, 607, 602, 428, 2314, 817, 1067, 1133, 931, 1555, 1576, 2803, 328, 1730, 1401, 1803, 1327, 1734, 1395, 1516, 1615, 1803, 1327, 1734, 1395), # 補充4個值 EventsPlac = c(610, 306, 304, 243, 367, 106, 19, 461, 24, 309, 103, 87, 111, 411, 198, 229, 211, 170, 208, 402, 497, 113, 293, 317, 339, 271, 302, 308, 355, 255, 339, 271, 302, 308), # 補充4個值 TotalPlac = c(3132, 1604, 1528, 1383, 1749, 644, 78, 2225, 185, 1511, 619, 579, 423, 2399, 732, 1049, 1123, 960, 1578, 1553, 2806, 326, 1727, 1405, 1814, 1317, 1736, 1392, 1554, 1577, 1814, 1317, 1736, 1392) # 補充4個值 ) # 檢查數據結構 str(subgroup_data)
## 'data.frame': 34 obs. of 8 variables: ## $ Subgroup : chr "All patients" "Age ≤72 yr" "Age >72 yr" "Female" ... ## $ HR : num 0.82 0.82 0.81 0.81 0.81 0.81 0.83 0.78 0.8 0.84 ... ## $ LowerCI : num 0.73 0.69 0.67 0.69 0.67 0.67 0.46 0.57 0.65 0.68 ... ## $ UpperCI : num 0.92 0.96 0.97 0.96 0.97 0.97 1.48 1.07 0.98 1.02 ... ## $ EventsDapa: num 512 247 265 195 317 97 21 372 22 261 ... ## $ TotalDapa : num 3131 1545 1586 1364 1767 ... ## $ EventsPlac: num 610 306 304 243 367 106 19 461 24 309 ... ## $ TotalPlac : num 3132 1604 1528 1383 1749 ...
head(subgroup_data)
## Subgroup HR LowerCI UpperCI EventsDapa TotalDapa EventsPlac TotalPlac ## 1 All patients 0.82 0.73 0.92 512 3131 610 3132 ## 2 Age ≤72 yr 0.82 0.69 0.96 247 1545 306 1604 ## 3 Age >72 yr 0.81 0.67 0.97 265 1586 304 1528 ## 4 Female 0.81 0.69 0.96 195 1364 243 1383 ## 5 Male 0.81 0.67 0.97 317 1767 367 1749 ## 6 Asian 0.81 0.67 0.97 97 630 106 644
# 繪制森林圖 ggplot(subgroup_data, aes(x = HR, y = reorder(Subgroup, desc(Subgroup)))) + geom_point(size = 2) + geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2) + geom_vline(xintercept = 1, linetype = "dashed") + geom_text(aes(label = paste0(EventsDapa, "/", TotalDapa, " vs ", EventsPlac, "/", TotalPlac)), x = 1.5, hjust = 0, size = 3) + scale_x_continuous(limits = c(0.4, 1.6), breaks = seq(0.4, 1.6, by = 0.2)) + labs(x = "Hazard Ratio (95% CI)", y = "", title = "Subgroup Analysis of Treatment Effect") + theme_minimal() + theme(panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())
#################文獻森林圖3,系統發育多樣性的森林圖################## # 創建數據框 phylogenetic_data <- data.frame( Group = c("Acidobacteria", "Actinobacteria", "Bacteroidetes", "Chlamydiae", "Chloroflexi", "Firmicutes", "Gemmatimonadetes", "Nitrospirae", "Planctomycetes", "α-Proteobacteria", "β-Proteobacteria", "γ-Proteobacteria", "δ-Proteobacteria", "Verrucomicrobia", "Ascomycota", "Basidiomycota", "Mortierellomycota", "AMF", "Plant pathogen", "Saprotroph", "Cercozoa", "Ciliophora", "Conosa", "Lobosa", "Ochrophyta", "Consumer", "Phototroph", "Parasite"), EffectSize = c(0.2, -0.3, 0.1, -0.4, 0.3, -0.2, 0.5, -0.1, 0.4, -0.3, 0.2, -0.5, 0.1, -0.2, 0.3, -0.4, 0.2, -0.1, 0.4, -0.3, 0.1, -0.2, 0.3, -0.4, 0.2, -0.1, 0.5, -0.3), Significance = c("P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05") ) # 繪制森林圖 ggplot(phylogenetic_data, aes(x = EffectSize, y = reorder(Group, EffectSize), fill = Significance)) + geom_col(aes(fill = Significance), width = 0.7) + geom_vline(xintercept = 0, linetype = "solid") + scale_fill_manual(values = c("P<0.05" = "#E41A1C", "P≥0.05" = "#377EB8")) + scale_x_continuous(limits = c(-0.6, 0.6), breaks = seq(-0.6, 0.6, by = 0.2)) + labs(x = "Warming Effect Size", y = "Phylogenetic Group", title = "Phylogenetic Diversity and Relative Abundance") + theme_minimal() + theme(panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), legend.position = "top")
#################文獻森林圖4,臨床特征的森林圖################## # 創建數據框 clinical_data <- data.frame( Feature = c("Age", "IDH_status (Mutant)", "MGMT_promoter_status (Wildtype)", "MGMT_promoter_status (Methylated)", "MGMT_promoter_status (Un-methylated)", "Stage (WHO II)", "Stage (WHO III)", "Stage (WHO IV)"), HR = c(1.0, 1.0, 3.0, 1.0, 1.3, 2.2, 2.2, 4.1), LowerCI = c(1.02, NA, 1.92, NA, 0.95, 1.44, 1.44, 2.39), UpperCI = c(1.0, NA, 4.7, NA, 1.8, 3.3, 3.3, 6.9), N = c(674, 428, 237, 477, 163, 248, 265, 160) ) # 繪制森林圖 ggplot(clinical_data, aes(x = HR, y = reorder(Feature, desc(Feature)))) + geom_point(size = 3) + geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2) + geom_vline(xintercept = 1, linetype = "dashed") + geom_text(aes(label = paste0("N=", N)), x = 5, hjust = 0, size = 3.5) + scale_x_log10(limits = c(0.5, 10), breaks = c(0.5, 1, 2, 3, 4, 5, 10)) + labs(x = "Hazard Ratio (95% CI)", y = "", title = "Clinical Features and Hazard Ratios", subtitle = "Events: 214; Global p-value (Log-Rank): 1.0821e-67\nAIC: 2020.23; Concordance Index: 0.86") + theme_minimal() + theme(panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), plot.subtitle = element_text(size = 10))
## Warning: Removed 2 rows containing missing values or values outside the scale range ## (`geom_errorbarh()`).
以上代碼完全是不斷向DeepSeek提問、不斷糾正生成,供大家參考!
特別聲明:以上內容(如有圖片或視頻亦包括在內)為自媒體平臺“網易號”用戶上傳并發布,本平臺僅提供信息存儲服務。
Notice: The content above (including the pictures and videos if any) is uploaded and posted by a user of NetEase Hao, which is a social media platform and only provides information storage services.