網易首頁 > 網易號 > 正文申請入駐

DeepSeek！ggplot2絲滑繪制森林圖！

2025-06-07 15:48:50　來源: 芒果師兄

上海舉報

分享至

森林圖是以統計指標和統計分析方法為基礎，用數值運算結果繪制出的圖型。它在平面直角坐標系中，以一條垂直的無效線 (橫坐標刻度為1或0)為中心，用平行于橫軸的多條線描述每個被納入研究的效應量和可信區間，用一個棱形 (或其它圖形)描述多個研究合并的效應量及可信區間，簡單直觀地描述了Meta分析的統計結果。森林圖是展示效應量及其可信區間的重要可視化工具，下面我們詳細介紹如何使用ggplot2包創建自定義的森林圖。

####################################森林圖繪制#################################### # ====================== # 第一部分：準備工作 # ====================== # 清除工作環境 rm(list = ls()) options(stringsAsFactors = FALSE) # 加載必要的包 if(!require(ggplot2)) install.packages("ggplot2")

## 載入需要的程序包：ggplot2

## Warning: 程序包'ggplot2'是用R版本4.4.3 來建造的

if(!require(dplyr)) install.packages("tidyverse")

## 載入需要的程序包：dplyr

##  ## 載入程序包：'dplyr'

## The following objects are masked from 'package:stats': ##  ##     filter, lag

## The following objects are masked from 'package:base': ##  ##     intersect, setdiff, setequal, union

library(ggplot2) library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── ## ? forcats   1.0.0     ? stringr   1.5.1 ## ? lubridate 1.9.4     ? tibble    3.2.1 ## ? purrr     1.0.2     ? tidyr     1.3.1 ## ? readr     2.1.5

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ## ? dplyr::filter() masks stats::filter() ## ? dplyr::lag()    masks stats::lag() ## ? Use the conflicted package ( ) to force all conflicts to become errors

# ====================== # 第二部分：創建模擬數據集 # ====================== # 創建模擬數據 set.seed(123) forest_data <- data.frame(   Varnames = paste("Variable", LETTERS[1:10]),   Factor = rep(c("Group1", "Group2"), each = 5),   OR = c(1.2, 1.5, 0.8, 1.1, 0.9, 1.8, 2.1, 1.4, 0.7, 1.3),   Lower = c(0.9, 1.1, 0.6, 0.8, 0.7, 1.3, 1.5, 1.0, 0.5, 0.9),   Upper = c(1.5, 1.9, 1.0, 1.4, 1.1, 2.3, 2.7, 1.8, 0.9, 1.7),   Sample = sample(50:200, 10) ) %>%   mutate(Varnames = factor(Varnames, levels = Varnames))  # 保持原始順序 # 創建注釋數據 annotation_data <- data.frame(   x = c(0.6, 2.2),   y = c(3, 8),   label = c("Protective Effect", "Risk Effect") ) # ====================== # 第三部分：基礎森林圖 # ====================== basic_forest <- ggplot(forest_data, aes(x = OR, y = Varnames, color = Factor)) +   geom_point(size = 3) +   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) +   scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) +   labs(x = "Odds Ratio (95% CI)", y = "",         title = "Basic Forest Plot") +   theme_bw() +   theme(legend.position = "top",         plot.title = element_text(hjust = 0.5, face = "bold"),         axis.text.y = element_text(face = "bold"))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ? Please use `linewidth` instead. ## This warning is displayed once every 8 hours. ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was ## generated.

print(basic_forest)

# ggsave("basic_forest.png", width = 8, height = 6, dpi = 300) # ====================== # 第四部分：添加注釋的森林圖 # ====================== annotated_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) +   geom_point(aes(color = Factor), size = 3) +   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) +   scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) +   labs(x = "Odds Ratio (95% CI)", y = "",         title = "Annotated Forest Plot") +   theme_bw() +   theme(legend.position = "top",         plot.title = element_text(hjust = 0.5, face = "bold"),         panel.grid.major = element_blank(),         panel.grid.minor = element_blank(),         axis.text.y = element_text(face = "bold")) +   geom_text(data = annotation_data,              aes(x = x, y = y, label = label),             color = "blue", fontface = "bold", size = 4) +   annotate("text", x = 1, y = 10.5, label = "Reference Line (OR = 1)",             color = "red", vjust = -0.5) print(annotated_forest)

# ggsave("annotated_forest.png", width = 9, height = 6, dpi = 300) # ====================== # 第五部分：點大小表示樣本量的森林圖 # ====================== size_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) +   geom_point(aes(size = Sample, color = Factor), alpha = 0.7) +   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed", color = "red", size = 0.8) +   scale_x_continuous(limits = c(0.5, 3), breaks = seq(0.5, 3, 0.5)) +   scale_size_continuous(range = c(3, 10),                          breaks = c(50, 100, 150, 200),                         labels = c("50", "100", "150", "200")) +   labs(x = "Odds Ratio (95% CI)", y = "",         title = "Forest Plot with Sample Size",        size = "Sample Size", color = "Group") +   theme_bw() +   theme(legend.position = "right",         plot.title = element_text(hjust = 0.5, face = "bold"),         panel.grid.major = element_blank(),         panel.grid.minor = element_blank(),         axis.text.y = element_text(face = "bold"),         legend.box = "vertical") print(size_forest)

# ggsave("size_forest.png", width = 10, height = 6, dpi = 300) # ====================== # 第六部分：高級定制森林圖 # ====================== advanced_forest <- ggplot(forest_data, aes(x = OR, y = Varnames)) + # 添加背景區域   annotate("rect", xmin = -Inf, xmax = 1, ymin = -Inf, ymax = Inf,            fill = "lightblue", alpha = 0.05) +   annotate("rect", xmin = 1, xmax = Inf, ymin = -Inf, ymax = Inf,            fill = "pink", alpha = 0.05) + # 添加數據點   geom_point(aes(size = Sample, color = Factor), shape = 18) + # 添加誤差線   geom_errorbarh(aes(xmax = Upper, xmin = Lower), height = 0.1, size = 0.8) + # 添加參考線   geom_vline(xintercept = 1, linetype = "dashed", color = "black", size = 1) + # 坐標軸設置   scale_x_continuous(limits = c(0.4, 3), breaks = seq(0.5, 3, 0.5)) +   scale_color_manual(values = c("Group1" = "#1f78b4", "Group2" = "#e31a1c")) +   scale_size_continuous(range = c(3, 10)) + # 標簽和標題   labs(x = "\nOdds Ratio with 95% Confidence Interval", y = "",         title = "Advanced Customized Forest Plot",        subtitle = "Effect sizes with sample size indicators",        caption = "Reference line at OR = 1.0\nDashed lines represent 95% CIs",        color = "Study Group", size = "Sample Size") + # 主題設置   theme_minimal() +   theme(     plot.title = element_text(face = "bold", hjust = 0.5, size = 14),     plot.subtitle = element_text(hjust = 0.5, size = 12),     axis.text.y = element_text(face = "bold", size = 10),     axis.text.x = element_text(size = 10),     axis.title.x = element_text(face = "bold", size = 11),     legend.position = "bottom",     legend.box = "horizontal",     panel.grid.major = element_line(color = "gray90"),     panel.grid.minor = element_blank(),     plot.background = element_rect(fill = "white", color = NA)   ) + # 添加注釋   geom_text(data = annotation_data,              aes(x = x, y = y, label = label),             color = c("#1f78b4", "#e31a1c"),              fontface = "bold", size = 4.5) + # 添加箭頭指示   annotate("segment", x = 0.6, xend = 0.8, y = 3, yend = 3,             arrow = arrow(length = unit(0.2, "cm")), color = "#1f78b4") +   annotate("segment", x = 2.2, xend = 2.0, y = 8, yend = 8,             arrow = arrow(length = unit(0.2, "cm")), color = "#e31a1c") print(advanced_forest)

# ggsave("advanced_forest.png", width = 10, height = 7, dpi = 300) # ====================== # 第七部分：數據說明 # ====================== cat(" 主數據結構說明: - Varnames: 變量名稱 (在y軸顯示) - Factor: 分組變量 (用于顏色分組) - OR: 效應量 (比值比) - Lower: 置信區間下限 - Upper: 置信區間上限 - Sample: 樣本量 (可用于點的大小) 注釋數據結構說明: - x: 注釋文本的x坐標位置 - y: 注釋文本的y坐標位置 - label: 要顯示的文本內容 ")

##  ## 主數據結構說明: ## - Varnames: 變量名稱 (在y軸顯示) ## - Factor: 分組變量 (用于顏色分組) ## - OR: 效應量 (比值比) ## - Lower: 置信區間下限 ## - Upper: 置信區間上限 ## - Sample: 樣本量 (可用于點的大小) ##  ## 注釋數據結構說明: ## - x: 注釋文本的x坐標位置 ## - y: 注釋文本的y坐標位置 ## - label: 要顯示的文本內容

# ====================== # 第八部分：總結 # ====================== cat(" 使用說明: 1. 基礎森林圖展示了最基本的森林圖元素 2. 注釋森林圖添加了文字說明和標記 3. 樣本量森林圖通過點大小反映樣本量信息 4. 高級森林圖展示了更多定制化選項 使用建議: - 根據實際數據調整坐標軸范圍 - 選擇合適的顏色方案 - 調整圖例位置和樣式以適應不同需求 - 使用ggsave()保存高質量圖片 ")

##  ## 使用說明: ## 1. 基礎森林圖展示了最基本的森林圖元素 ## 2. 注釋森林圖添加了文字說明和標記 ## 3. 樣本量森林圖通過點大小反映樣本量信息 ## 4. 高級森林圖展示了更多定制化選項 ##  ## 使用建議: ## - 根據實際數據調整坐標軸范圍 ## - 選擇合適的顏色方案 ## - 調整圖例位置和樣式以適應不同需求 ## - 使用ggsave()保存高質量圖片

#########文獻森林圖1，飲料消費與遺傳風險的森林圖############## # 創建數據框 beverage_data <- data.frame(   GeneticRisk = rep(c("Low", "Intermediate", "High"), each = 12),   Consumption = rep(rep(c("0/day", ">0-1/day", ">1-2/day", ">2/day"), 3), each = 3),   BeverageType = rep(c("SSBs", "ASBs", "PJs"), 12),   HR = c(1.00, 0.97, 0.87, 1.48, 1.00, 0.82, 1.28, 1.50, 1.00, 0.85, 1.06, 1.12, 1.00, 0.97, 1.03, 1.17, 1.00, 1.02, 1.20, 1.21, 1.00, 0.93, 1.01, 0.96, 1.00, 0.99, 1.03, 1.15, 1.00, 1.08, 1.16, 1.19, 1.00, 0.89, 0.92, 0.82),   LowerCI = c(NA, 0.84, 0.69, 1.14, NA, 0.67, 1.00, 1.08, NA, 0.75, 0.88, 0.83, NA, 0.90, 0.92, 1.00, NA, 0.92, 1.05, 1.01, NA, 0.87, 0.91, 0.80, NA, 0.92, 0.91, 0.99, NA, 0.98, 1.00, 0.99, NA, 0.84, 0.82, 0.68),   UpperCI = c(NA, 1.12, 1.09, 1.91, NA, 0.99, 1.64, 2.08, NA, 0.96, 1.27, 1.53, NA, 1.05, 1.15, 1.36, NA, 1.12, 1.38, 1.45, NA, 0.99, 1.12, 1.14, NA, 1.08, 1.16, 1.35, NA, 1.19, 1.33, 1.44, NA, 0.96, 1.02, 0.99) ) # 繪制森林圖 ggplot(beverage_data, aes(x = HR, y = interaction(Consumption, BeverageType),                            color = BeverageType)) +   geom_point(size = 3, position = position_dodge(width = 0.5)) +   geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI),                   height = 0.2, position = position_dodge(width = 0.5)) +   geom_vline(xintercept = 1, linetype = "dashed") +   facet_grid(GeneticRisk ~ ., scales = "free_y", space = "free_y") +   scale_color_manual(values = c("SSBs" = "#E41A1C", "ASBs" = "#377EB8", "PJs" = "#4DAF4A")) +   labs(x = "Hazard Ratio (95% CI)", y = "Consumption Level",         title = "Association between beverage consumption and risk by genetic risk level") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank(),         strip.text = element_text(face = "bold"))

## Warning: Removed 9 rows containing missing values or values outside the scale range ## (`geom_errorbarh()`).

#########文獻森林圖2，亞組分析的森林圖############## subgroup_data <- data.frame(   Subgroup = c("All patients", "Age ≤72 yr", "Age >72 yr", "Female", "Male",  "Asian", "Black", "White", "Other", "Europe or Saudi Arabia",  "Asia", "Latin America", "North America", "NYHA class II",  "NYHA class III or IV", "LVEF ≤49%", "LVEF 50-59%", "LVEF ≥60%", "NT-proBNP ≤1011 pg/ml", "NT-proBNP >1011 pg/ml", "Not recently hospitalized", "Recently hospitalized", "No diabetes", "Diabetes", "No atrial fibrillation",  "Atrial fibrillation", "BMI <30", "BMI ≥30", "eGFR <60 ml/min", "eGFR ≥60 ml/min", "SBP ≤128 mm Hg", "SBP >128 mm Hg", "No previous LVEF ≤40%", "Previous LVEF ≤40%"),   HR = c(0.82, 0.82, 0.81, 0.81, 0.81, 0.81, 0.83, 0.78, 0.80, 0.84,  0.82, 0.82, 0.71, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81,  0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81, 0.81),  # 補充4個值使總數達到34   LowerCI = c(0.73, 0.69, 0.67, 0.69, 0.67, 0.67, 0.46, 0.57, 0.65, 0.68,  0.69, 0.72, 0.60, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69,  0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69),  # 補充4個值   UpperCI = c(0.92, 0.96, 0.97, 0.96, 0.97, 0.97, 1.48, 1.07, 0.98, 1.02,  0.96, 0.94, 0.85, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96,  0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96),  # 補充4個值   EventsDapa = c(512, 247, 265, 195, 317, 97, 21, 372, 22, 261, 92, 70, 89,  331, 181, 207, 174, 131, 173, 339, 419, 93, 242, 270, 285,  227, 275, 236, 289, 223, 285, 227, 275, 236),  # 補充4個值   TotalDapa = c(3131, 1545, 1586, 1364, 1767, 630, 81, 2214, 206, 1494, 607,  602, 428, 2314, 817, 1067, 1133, 931, 1555, 1576, 2803, 328,  1730, 1401, 1803, 1327, 1734, 1395, 1516, 1615, 1803, 1327, 1734, 1395),  # 補充4個值   EventsPlac = c(610, 306, 304, 243, 367, 106, 19, 461, 24, 309, 103, 87,  111, 411, 198, 229, 211, 170, 208, 402, 497, 113, 293, 317,  339, 271, 302, 308, 355, 255, 339, 271, 302, 308),  # 補充4個值   TotalPlac = c(3132, 1604, 1528, 1383, 1749, 644, 78, 2225, 185, 1511, 619,  579, 423, 2399, 732, 1049, 1123, 960, 1578, 1553, 2806, 326,  1727, 1405, 1814, 1317, 1736, 1392, 1554, 1577, 1814, 1317, 1736, 1392)  # 補充4個值 ) # 檢查數據結構 str(subgroup_data)

## 'data.frame':    34 obs. of  8 variables: ##  $ Subgroup  : chr  "All patients" "Age ≤72 yr" "Age >72 yr" "Female" ... ##  $ HR        : num  0.82 0.82 0.81 0.81 0.81 0.81 0.83 0.78 0.8 0.84 ... ##  $ LowerCI   : num  0.73 0.69 0.67 0.69 0.67 0.67 0.46 0.57 0.65 0.68 ... ##  $ UpperCI   : num  0.92 0.96 0.97 0.96 0.97 0.97 1.48 1.07 0.98 1.02 ... ##  $ EventsDapa: num  512 247 265 195 317 97 21 372 22 261 ... ##  $ TotalDapa : num  3131 1545 1586 1364 1767 ... ##  $ EventsPlac: num  610 306 304 243 367 106 19 461 24 309 ... ##  $ TotalPlac : num  3132 1604 1528 1383 1749 ...

head(subgroup_data)

##       Subgroup   HR LowerCI UpperCI EventsDapa TotalDapa EventsPlac TotalPlac ## 1 All patients 0.82    0.73    0.92        512      3131        610      3132 ## 2   Age ≤72 yr 0.82    0.69    0.96        247      1545        306      1604 ## 3   Age >72 yr 0.81    0.67    0.97        265      1586        304      1528 ## 4       Female 0.81    0.69    0.96        195      1364        243      1383 ## 5         Male 0.81    0.67    0.97        317      1767        367      1749 ## 6        Asian 0.81    0.67    0.97         97       630        106       644

# 繪制森林圖 ggplot(subgroup_data, aes(x = HR, y = reorder(Subgroup, desc(Subgroup)))) +   geom_point(size = 2) +   geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed") +   geom_text(aes(label = paste0(EventsDapa, "/", TotalDapa, " vs ",                                 EventsPlac, "/", TotalPlac)),              x = 1.5, hjust = 0, size = 3) +   scale_x_continuous(limits = c(0.4, 1.6), breaks = seq(0.4, 1.6, by = 0.2)) +   labs(x = "Hazard Ratio (95% CI)", y = "",         title = "Subgroup Analysis of Treatment Effect") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank())

#################文獻森林圖3，系統發育多樣性的森林圖################## # 創建數據框 phylogenetic_data <- data.frame(   Group = c("Acidobacteria", "Actinobacteria", "Bacteroidetes", "Chlamydiae",  "Chloroflexi", "Firmicutes", "Gemmatimonadetes", "Nitrospirae",  "Planctomycetes", "α-Proteobacteria", "β-Proteobacteria",  "γ-Proteobacteria", "δ-Proteobacteria", "Verrucomicrobia",  "Ascomycota", "Basidiomycota", "Mortierellomycota", "AMF",  "Plant pathogen", "Saprotroph", "Cercozoa", "Ciliophora",  "Conosa", "Lobosa", "Ochrophyta", "Consumer", "Phototroph",  "Parasite"),   EffectSize = c(0.2, -0.3, 0.1, -0.4, 0.3, -0.2, 0.5, -0.1, 0.4, -0.3,  0.2, -0.5, 0.1, -0.2, 0.3, -0.4, 0.2, -0.1, 0.4, -0.3,  0.1, -0.2, 0.3, -0.4, 0.2, -0.1, 0.5, -0.3),   Significance = c("P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05", "P<0.05", "P≥0.05",  "P<0.05", "P≥0.05", "P<0.05", "P≥0.05") ) # 繪制森林圖 ggplot(phylogenetic_data, aes(x = EffectSize, y = reorder(Group, EffectSize),                                fill = Significance)) +   geom_col(aes(fill = Significance), width = 0.7) +   geom_vline(xintercept = 0, linetype = "solid") +   scale_fill_manual(values = c("P<0.05" = "#E41A1C", "P≥0.05" = "#377EB8")) +   scale_x_continuous(limits = c(-0.6, 0.6), breaks = seq(-0.6, 0.6, by = 0.2)) +   labs(x = "Warming Effect Size", y = "Phylogenetic Group",         title = "Phylogenetic Diversity and Relative Abundance") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank(),         legend.position = "top")

#################文獻森林圖4，臨床特征的森林圖################## # 創建數據框 clinical_data <- data.frame(   Feature = c("Age", "IDH_status (Mutant)", "MGMT_promoter_status (Wildtype)",  "MGMT_promoter_status (Methylated)", "MGMT_promoter_status (Un-methylated)",  "Stage (WHO II)", "Stage (WHO III)", "Stage (WHO IV)"),   HR = c(1.0, 1.0, 3.0, 1.0, 1.3, 2.2, 2.2, 4.1),   LowerCI = c(1.02, NA, 1.92, NA, 0.95, 1.44, 1.44, 2.39),   UpperCI = c(1.0, NA, 4.7, NA, 1.8, 3.3, 3.3, 6.9),   N = c(674, 428, 237, 477, 163, 248, 265, 160) ) # 繪制森林圖 ggplot(clinical_data, aes(x = HR, y = reorder(Feature, desc(Feature)))) +   geom_point(size = 3) +   geom_errorbarh(aes(xmin = LowerCI, xmax = UpperCI), height = 0.2) +   geom_vline(xintercept = 1, linetype = "dashed") +   geom_text(aes(label = paste0("N=", N)), x = 5, hjust = 0, size = 3.5) +   scale_x_log10(limits = c(0.5, 10), breaks = c(0.5, 1, 2, 3, 4, 5, 10)) +   labs(x = "Hazard Ratio (95% CI)", y = "",         title = "Clinical Features and Hazard Ratios",        subtitle = "Events: 214; Global p-value (Log-Rank): 1.0821e-67\nAIC: 2020.23; Concordance Index: 0.86") +   theme_minimal() +   theme(panel.grid.major.y = element_blank(),         panel.grid.minor.y = element_blank(),         plot.subtitle = element_text(size = 10))

## Warning: Removed 2 rows containing missing values or values outside the scale range ## (`geom_errorbarh()`).

以上代碼完全是不斷向DeepSeek提問、不斷糾正生成，供大家參考！

特別聲明：以上內容(如有圖片或視頻亦包括在內)為自媒體平臺“網易號”用戶上傳并發布，本平臺僅提供信息存儲服務。

Notice: The content above (including the pictures and videos if any) is uploaded and posted by a user of NetEase Hao, which is a social media platform and only provides information storage services.