I will show what I'm doing it in R to make sure if I'm doing it correctly
This is my dataset which I'm using for analysis
dput(df2)
structure(list(TCGA_ID = c("TCGA-AB-2965", "TCGA-AB-2881", "TCGA-AB-2834",
"TCGA-AB-2818", "TCGA-AB-2898", "TCGA-AB-2956", "TCGA-AB-2994",
"TCGA-AB-2920", "TCGA-AB-3009", "TCGA-AB-2892", "TCGA-AB-2814",
"TCGA-AB-2891", "TCGA-AB-2991", "TCGA-AB-2875", "TCGA-AB-2805",
"TCGA-AB-3007", "TCGA-AB-2884", "TCGA-AB-2975", "TCGA-AB-2946",
"TCGA-AB-2932", "TCGA-AB-2979", "TCGA-AB-2917", "TCGA-AB-2828",
"TCGA-AB-2867", "TCGA-AB-2815", "TCGA-AB-2839", "TCGA-AB-2928",
"TCGA-AB-2980", "TCGA-AB-2873", "TCGA-AB-2853", "TCGA-AB-2976",
"TCGA-AB-2877", "TCGA-AB-3001", "TCGA-AB-3012", "TCGA-AB-2940",
"TCGA-AB-2992", "TCGA-AB-2806", "TCGA-AB-2995", "TCGA-AB-2847",
"TCGA-AB-2842", "TCGA-AB-2858", "TCGA-AB-2987", "TCGA-AB-2856",
"TCGA-AB-2916", "TCGA-AB-2901", "TCGA-AB-2844", "TCGA-AB-2808",
"TCGA-AB-2955", "TCGA-AB-2820", "TCGA-AB-2811", "TCGA-AB-2835",
"TCGA-AB-2930", "TCGA-AB-2845", "TCGA-AB-2893", "TCGA-AB-2942",
"TCGA-AB-2921", "TCGA-AB-2988", "TCGA-AB-3002", "TCGA-AB-2925",
"TCGA-AB-2943", "TCGA-AB-2959", "TCGA-AB-2933", "TCGA-AB-2939",
"TCGA-AB-2866", "TCGA-AB-2813", "TCGA-AB-2896", "TCGA-AB-3008",
"TCGA-AB-2950", "TCGA-AB-2819", "TCGA-AB-2895", "TCGA-AB-2830",
"TCGA-AB-2812", "TCGA-AB-2918", "TCGA-AB-2915", "TCGA-AB-2869",
"TCGA-AB-2948", "TCGA-AB-2931", "TCGA-AB-2924", "TCGA-AB-2935",
"TCGA-AB-2836", "TCGA-AB-2970", "TCGA-AB-2900", "TCGA-AB-2936",
"TCGA-AB-2934", "TCGA-AB-2952", "TCGA-AB-2927", "TCGA-AB-2817",
"TCGA-AB-2949", "TCGA-AB-2914", "TCGA-AB-2996", "TCGA-AB-2885",
"TCGA-AB-2882", "TCGA-AB-2825", "TCGA-AB-2823", "TCGA-AB-2888",
"TCGA-AB-2919", "TCGA-AB-2890", "TCGA-AB-2984", "TCGA-AB-2897",
"TCGA-AB-2865", "TCGA-AB-2983", "TCGA-AB-2841"), turqoise_module = c("High",
"Low", "High", "High", "Low", "High", "Low", "High", "Low", "Low",
"High", "High", "Low", "Low", "High", "Low", "High", "Low", "Low",
"Low", "Low", "Low", "Low", "High", "Low", "High", "High", "Low",
"Low", "High", "High", "Low", "Low", "Low", "Low", "Low", "Low",
"Low", "High", "High", "Low", "High", "High", "High", "Low",
"High", "Low", "Low", "High", "High", "Low", "High", "Low", "High",
"Low", "High", "High", "Low", "High", "High", "Low", "High",
"Low", "High", "High", "High", "Low", "Low", "Low", "High", "High",
"High", "High", "High", "High", "Low", "High", "Low", "High",
"High", "High", "High", "Low", "High", "High", "High", "Low",
"Low", "Low", "Low", "High", "Low", "High", "Low", "Low", "Low",
"High", "Low", "Low", "High", "High", "Low"), OS_MONTHS = c(11.3,
29.7, 7.7, 10.2, 36.1, 5.7, 83.5, 11.8, 19, 59.3, 26.3, 21.5,
88.3, 27.7, 18.5, 75.8, 24.8, 34, 24.4, 42.1, 47, 57.3, 99.9,
5.2, 26.3, 16.3, 4, 47.5, 32.7, 3.1, 30, 41.4, 76.2, 86.6, 55.4,
56.3, 30.6, 73.6, 52.7, 0.3, 19.2, 6.3, 5.3, 45.3, 10.5, 3.9,
118.1, 16.4, 0.3, 8.2, 77.3, 7.1, 9.3, 6.6, 43.5, 8.1, 0.8, 46.8,
7.9, 4.2, 15.4, 4.6, 36.9, 5.5, 1.3, 7.5, 27, 40.3, 95.6, 5.7,
8.1, 11.5, 7.4, 0.5, 27.1, 18.1, 0.1, 26, 1.6, 17, 10.7, 6.3,
13.8, 6.6, 1.9, 2.4, 9.3, 32.6, 48.3, 73, 7, 11, 7.5, 0.2, 33.5,
26.8, 0.5, 71.3, 30.5, 2.3, 11.2, 46.5), Status = c(1L, 0L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L,
1L, 1L, 1L)), row.names = c(NA, -102L), class = "data.frame")
Where the turqoise_module was obtained based on the a signature of 31 genes which I was able to get based on the explanation here
Now what I do next is this
fit1 = survfit(Surv(OS_MONTHS, Status)~ turqoise_module, data=df2)
fit1
out1 = survdiff(Surv(OS_MONTHS, Status)~ turqoise_module, data=df2)
out1
The output of the above model fitting is this
fit1
Call: survfit(formula = Surv(OS_MONTHS, Status) ~ turqoise_module,
data = df2)
n events median 0.95LCL 0.95UCL
turqoise_module=High 51 48 7.1 5.7 8.2
turqoise_module=Low 51 17 NA 55.4 NA
> out1
Call:
survdiff(formula = Surv(OS_MONTHS, Status) ~ turqoise_module,
data = df2)
N Observed Expected (O-E)^2/E (O-E)^2/V
turqoise_module=High 51 48 18.4 47.7 74.7
turqoise_module=Low 51 17 46.6 18.8 74.7
Chisq= 74.7 on 1 degrees of freedom, p= <2e-16
When i plot them using this code
ggsurvplot(fit1,
pval = TRUE, conf.int = TRUE,data = df2,
risk.table = TRUE, # Add risk table
risk.table.col = "strata", # Change risk table color by groups
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_bw(base_size = fsize),
palette = c("#990000", "#000099","green","black"))
Now to obtain the turquoise_module based on which I categorized high or low I have used this code
df1 <- x_te[,gene]
df1 <- as.data.frame(df1)
df1$LSC22score <- rowSums(
sweep(x = df1,
MARGIN = 2,
STATS = beta1,
FUN = *)
)
df1 <- df1 %>% dplyr::mutate(turqoise_module =
dplyr::case_when(
LSC22score > median(LSC22score) ~ "High",
LSC22score < median(LSC22score) ~ "Low"
))
Now my question is when I see the out1 output and the figure generate there is bit of confusion due to my lack of conceptual clarity.
**
The expected number of deaths in the high turqoise module group is 18.4, while the observed number of deaths is 48, indicating that there are more deaths in this group than expected. The opposite is true for the low turqoise module group, where the observed number of deaths is 17 while the expected number is 46.6, suggesting that there are fewer deaths in this group than expected.
**
But when I see the figure the I thought patient categorized as low are surviving for longer period of time.
Any suggestion or help would be really appreciated how to interpret or where am I going wrong

out1output which is thesurvdiffoutput which I have quoted in the expected vs observed and the graph where i see the turqoise_low are have more survival i was not able to understand the numbers are risk shown in ggsurvplot – PesKchan Apr 08 '23 at 06:56