Jovialis
Advisor
- Messages
- 9,888
- Reaction score
- 6,794
- Points
- 113
- Ethnic group
- Italian
- Y-DNA haplogroup
- R1b-PF7566>Y227216
- mtDNA haplogroup
- H6a1b7
I used ChatGPT to help craft a script that streamlines testing multiple samples to see how well they fit within an existing population model. This approach makes it easier to organize and assess whether a particular sample or group of samples works in combination with another set you're interested in comparing.
For instance, in the example below, I'm testing Steppe_EMBA and Italy_Mesolithic.SG against a range of Neolithic to Early Bronze Age Aegean samples:
For instance, in the example below, I'm testing Steppe_EMBA and Italy_Mesolithic.SG against a range of Neolithic to Early Bronze Age Aegean samples:
Code:
# Sarno et al. 2021 R-script CHG & Iran_N combined Outgroup Version
# Define paths for dataset
prefix = "D:\\Bioinformatics\\01_Admixtools_Dataset\\v54.1.p1_HO_Jovialis_Plink\\v54.1.p1_HO_Jovialis"
my_f2_dir = "D:\\Bioinformatics\\my_f2_dir_Jovialis"
# Load necessary libraries
library(admixtools)
library(tidyverse)
# Define target population
target = c('Jovialis') # Update this if your sample has a different name in the merged dataset
# Define right populations
right = c('Mota', 'Ust_Ishim', 'Kostenki14', 'MA1_HG', 'Goyet', 'ElMiron',
'Vestonice16', 'Villabruna', 'EHG', 'Levant_N', 'Anatolia_N',
'WHG', 'CHG-Iran_N', 'Natufian')
# Generate f2 stats
mypops = c(right, target)
extract_f2(prefix, my_f2_dir, pops = mypops, overwrite = TRUE, maxmiss = 1)
f2_blocks = f2_from_precomp(my_f2_dir, pops = mypops, afprod = TRUE)
# Define the list of optional populations for each set
optional_pops = c(
'C_Italian_N', 'Italy_Sicily_EN', 'Italy_Sicily_N_Stentinello', 'Italy_Sicily_MN', 'Greece_NeaNikomedeia_EN.SG', 'Greece_N', 'Greece_Peloponnese_N', 'Greece_LN_2.SG', 'Greece_Minoan_Lassithi', 'Greece_Minoan_Odigitria', 'Greece_Minoan_Kephala_Petras.SG', 'Greece_Koufonisi_Cycladic_EBA.SG', 'I2683-Anatolia_EBA_Isparta'
)
# Define the sets
sets = list(
list(must_use = c('Steppe_EMBA', 'Italy_Mesolithic.SG'))
)
# Initialize list to store all results
all_results_list = list()
# Function to create combinations and run qpadm
run_qpadm_for_set <- function(set_must_use, optional_pops, prefix, right, target) {
results_list <- list() # Initialize an empty list to store results within the function
for (pop in optional_pops) {
left_set = c(set_must_use, pop)
# Run qpadm model
results = qpadm(prefix, left_set, right, target, allsnps = TRUE)
# Store results in the list
results_list[[length(results_list) + 1]] <- list(left = left_set, weights = results$weights, popdrop = results$popdrop)
# Print summary of results
cat("Testing Left Set:", paste(left_set, collapse = ", "), "\n")
cat("Weights:\n")
print(results$weights)
cat("Popdrop Analysis:\n")
print(results$popdrop)
cat("----------------------------------------------------\n")
}
return(results_list)
}
# Iterate over each set and run qpadm for all combinations
for (i in 1:length(sets)) {
cat("Running Set", i, "\n")
set_results <- run_qpadm_for_set(sets[[i]]$must_use, optional_pops, prefix, right, target)
# Append the results of the current set to the overall results list
all_results_list[[i]] <- set_results
}
# all_results_list now contains results from all sets and combinations
[code/]