# calculations
library(geomorph) # GPA
library(Morpho) # LDA
library(stats) # MANOVA
library(IdentiFlyR) # classification
library(reshape2) # melt
library(stringr) # str_split_fixed
library(emmeans) # emmeans
# plotting and visualization
library(ggplot2) # plots
ggplot2::theme_set(theme_light())
library(rnaturalearth) # maps
library(terra) # raster maps
library(sf) # st_transform
library(ggpubr) # ggarange
library(ggforce) # geom_arc_barAnalysis of honey bee wing shape in northwestern Europe
Libraries
Variable names
p <- 19 # number of landmarks
k <- 2 # number of dimensions, in this case 2 for coordinates (x, y)
# create coordinates names used by IdentiFly
nameXy <- c("x1", "y1")
for (i in 2:p) {
nameXy <- c(nameXy, paste0("x", i))
nameXy <- c(nameXy, paste0("y", i))
}
nameXy [1] "x1" "y1" "x2" "y2" "x3" "y3" "x4" "y4" "x5" "y5" "x6" "y6"
[13] "x7" "y7" "x8" "y8" "x9" "y9" "x10" "y10" "x11" "y11" "x12" "y12"
[25] "x13" "y13" "x14" "y14" "x15" "y15" "x16" "y16" "x17" "y17" "x18" "y18"
[37] "x19" "y19"
# The number of principal components used is 2*p-4 = 34, which is equal to the
# degrees of freedom
namePc <- paste0("PC", 1:(2 * p - 4))
namePc [1] "PC1" "PC2" "PC3" "PC4" "PC5" "PC6" "PC7" "PC8" "PC9" "PC10"
[11] "PC11" "PC12" "PC13" "PC14" "PC15" "PC16" "PC17" "PC18" "PC19" "PC20"
[21] "PC21" "PC22" "PC23" "PC24" "PC25" "PC26" "PC27" "PC28" "PC29" "PC30"
[31] "PC31" "PC32" "PC33" "PC34"
# define which landmarks are connected by lines in wireframe graph
linkX <- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 14,
15, 16, 17)
linkY <- c(2, 3, 4, 5, 6, 19, 6, 10, 12, 8, 8, 14, 19, 9, 10, 15, 11, 12, 16, 13,
18, 15, 16, 17, 18)
linkApis <- cbind(linkX, linkY)Read data
New data collected for this study are available at Zenodo (Machlowska et al., 2026). There are use also data published during earlier studies (Oleksa et al., 2022; Tofilski, 2025)
dataRaw <- read.csv("https://zenodo.org/records/18845767/files/BY-raw-coordinates.csv")
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/DE-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/ES_517_573-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/FR-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/GB-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/IE-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/LT-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/NL-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/NO-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/18845767/files/PL_254_922-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/7244070/files/ES-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/7244070/files/PL-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/7244070/files/PT-raw-coordinates.csv"))
dataRaw <- rbind(dataRaw, read.csv("https://zenodo.org/records/17621032/files/CU-raw-coordinates.csv"))
# Read geographic coordinates
dataGeo <- read.csv("https://zenodo.org/records/18845767/files/BY-data.csv")
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/DE-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/ES_517_573-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/FR-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/GB-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/IE-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/LT-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/NL-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/NO-data.csv"))
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/18845767/files/PL_254_922-data.csv"))
geoDataNew <- dataGeo
# number of workers per sample in new datasets
nColony <- table(geoDataNew$sample)
nColony <- data.frame(nColony, row.names = 1) # move column 1 to row names
ggplot(nColony, aes(x = Freq)) + geom_histogram(binwidth = 1)summary(nColony$Freq) Min. 1st Qu. Median Mean 3rd Qu. Max.
5.00 12.00 21.00 21.64 28.00 69.00
dataEU <- read.csv("https://zenodo.org/record/7244070/files/EU-geo-data.csv")
countryEU <- substr(dataEU$sample, 1, 2)
dataEU <- dataEU[countryEU %in% c("ES", "PL", "PT"), ]
dataEU$date <- as.numeric(substr(dataEU$date, 1, 4))
dataGeo <- rbind(dataGeo, dataEU)
dataGeo <- rbind(dataGeo, read.csv("https://zenodo.org/records/17621032/files/CU-data.csv"))
dataGeoSample <- aggregate(dataGeo[c("latitude", "longitude", "date")], by = list(dataGeo$sample),
FUN = mean)
dataGeoSample <- data.frame(dataGeoSample, row.names = 1) # move column 1 to row names
dataRaw$ISO <- substr(dataRaw$file, 1, 2)
dataGeoSample$ISO <- substr(rownames(dataGeoSample), 1, 2)
dataGeoSample$ISO <- as.factor(dataGeoSample$ISO)
dataGeoSample$country <- dataGeoSample$ISO
dataGeoSample$country <- gsub("BY", "Belarus", dataGeoSample$country)
dataGeoSample$country <- gsub("CU", "Cuba", dataGeoSample$country)
dataGeoSample$country <- gsub("DE", "Germany", dataGeoSample$country)
dataGeoSample$country <- gsub("ES", "Spain", dataGeoSample$country)
dataGeoSample$country <- gsub("FR", "France", dataGeoSample$country)
dataGeoSample$country <- gsub("GB", "UK", dataGeoSample$country)
dataGeoSample$country <- gsub("IE", "Ireland", dataGeoSample$country)
dataGeoSample$country <- gsub("LT", "Lithuania", dataGeoSample$country)
dataGeoSample$country <- gsub("NL", "Netherlands", dataGeoSample$country)
dataGeoSample$country <- gsub("NO", "Norway", dataGeoSample$country)
dataGeoSample$country <- gsub("PL", "Poland", dataGeoSample$country)
dataGeoSample$country <- gsub("PT", "Portugal", dataGeoSample$country)
# number of samples per location
location <- paste(round(dataGeoSample$latitude, 1), round(dataGeoSample$longitude,
1))
nLocation <- table(location)
nLocation <- as.data.frame(nLocation)
locationLatLon <- str_split_fixed(nLocation$location, " ", 2)
colnames(locationLatLon) <- c("latitude", "longitude")
locationLatLon <- apply(locationLatLon, 2, as.numeric)
locationLatLon <- as.data.frame(locationLatLon)
nLocation <- cbind(nLocation, locationLatLon)
location <- paste(round(dataGeoSample$latitude, 1), round(dataGeoSample$longitude,
1))
nLocation <- table(location)
nLocation <- as.data.frame(nLocation)
locationLatLon <- str_split_fixed(nLocation$location, " ", 2)
colnames(locationLatLon) <- c("latitude", "longitude")
locationLatLon <- apply(locationLatLon, 2, as.numeric)
locationLatLon <- as.data.frame(locationLatLon)
nLocation <- cbind(nLocation, locationLatLon)
# average data for country
dataGeoCountry <- aggregate(dataGeoSample[c("latitude", "longitude")], by = list(dataGeoSample$ISO),
FUN = mean)
country <- aggregate(dataGeoSample[c("country")], by = list(dataGeoSample$ISO), FUN = unique)
dataGeoCountry <- merge(dataGeoCountry, country, by = "Group.1")
colnames(dataGeoCountry)[1] <- "ISO"Map
# Read elevation data In order to download the TIF file uncomment the two lines
# below
# #download.file('https://secure-web.cisco.com/15MbNLDNcvKRdwHz0C8BuWIBGjqhNz-VGOCGhfT0ba5LztPXbyrwb6epoX-FTjajgD8ZFDd0GJRkhcWz3V-WwLn7u53iy9KDz55qixlb5sECZRIKC-2tSg0tc_ZY2hq9U0S10DGheC9vVDLKudF18BT9aItv7zRwYlAnC_SQ2ODKtG9EhyJxY4GV6kntF7KVOTyBn0eT9PmiW_EJvCBFchY50GZBD6Ul9wyCVarVhNMTz91dmugWBBEpPb0St3lArbpanzUMmb4xrnZtX1Ve4AW4V9Px-yU6hq3_xT-TnHlbpX-oILMRTq8lJlFRDLUMMzJdYdy_64T_9KQCiP1M9qTPrJjl3rcIo5lZJoHawqz1DUYYKxe-u4oyQK77hLQL4OaxSZXWylQf-rly0woYPMDTW0YixWICqzpMBof0YhX8/https%3A%2F%2Fgeodata.ucdavis.edu%2Fclimate%2Fworldclim%2F2_1%2Fbase%2Fwc2.1_2.5m_elev.zip',
# 'wc2.1_2.5m_elev.zip') unzip('wc2.1_2.5m_elev.zip')
# or use your local file
elev <- rast("D:/WorldClim/wc2.1_2.5m_elev.tif")
colorElev <- colorRampPalette(c("#f7f7f7", "#d9d9d9", "#bdbdbd", "#636363", "#303030"),
bias = 2)
xMin <- -10
xMax <- 40
yMin <- 35
yMax <- 65
# Define extent and crop (terra)
e <- ext(xMin - 5, xMax + 5, yMin - 5, yMax + 5)
elev <- crop(elev, e)
elevDf <- as.data.frame(elev, xy = TRUE, na.rm = TRUE)
colnames(elevDf) <- c("longitude", "latitude", "altitude")
dataMap <- ne_countries(scale = "medium", returnclass = "sf")
n <- length(unique(dataGeoSample$country))
ggplot(data = dataMap) + geom_sf() + coord_sf(xlim = c(xMin, xMax), ylim = c(yMin,
yMax)) + geom_jitter(data = dataGeoSample, aes(x = longitude, y = latitude, color = country,
shape = country), width = 0.5, height = 0.5) + scale_shape_manual(name = "",
values = 1:n) + scale_color_manual(name = "", values = rainbow(n))# Figure 1
ggplot(data = dataMap) + geom_raster(data = elevDf, aes(x = longitude, y = latitude,
fill = altitude)) + scale_fill_gradientn(colours = colorElev(100)) + geom_sf(fill = NA) +
coord_sf(xlim = c(xMin, xMax), ylim = c(yMin, yMax)) + geom_point(data = nLocation,
aes(x = longitude, y = latitude, size = Freq)) + scale_size_binned_area(name = "colonies",
limits = c(0, 100), breaks = c(0, 5, 10, 20, 40)) + theme(legend.position = "right")# only new samples
geoDataNewSample <- aggregate(geoDataNew[c("latitude", "longitude", "date")], by = list(geoDataNew$sample),
FUN = mean)
geoDataNewSample <- data.frame(geoDataNewSample, row.names = 1)
geoDataNewSample$ISO <- substr(rownames(geoDataNewSample), 1, 2)
ggplot(data = dataMap) + geom_sf() + coord_sf(xlim = c(xMin, xMax), ylim = c(yMin,
yMax)) + geom_jitter(data = geoDataNewSample, aes(x = longitude, y = latitude,
color = ISO, shape = ISO), width = 0.5, height = 0.5) + scale_shape_manual(name = "",
values = 1:n) + scale_color_manual(name = "", values = rainbow(n))GPA alignment
# Convert from 2D array to 3D array
wing3D <- arrayspecs(dataRaw[, nameXy], p, k)
dimnames(wing3D)[[3]] <- dataRaw$file
# Align the coordinates using Generalized Procrustes Analysis
GPA <- gpagen(wing3D, print.progress = FALSE)
# Convert from 3D array to 2D array
dataAligned <- two.d.array(GPA$coords)
# rename column names from 1.X, 1.Y to x1, y1 ...
colnames(dataAligned) <- gsub("([0-9]+)\\.([XY])", "\\L\\2\\1", colnames(dataAligned),
perl = TRUE)
# Average coorinates within samples
dataRaw$sample <- substr(dataRaw$file, 1, 7)
dataSample <- aggregate(dataAligned, by = list(dataRaw$sample), FUN = mean)
dataSample <- data.frame(dataSample, row.names = 1) # move column 1 to row names
dataSample <- merge(dataSample, dataGeoSample, by = 0)
dataSample <- data.frame(dataSample, row.names = 1) # move column 1 to row namesSample size
nrow(geoDataNew) # number of new wings[1] 29043
geoDataNew$ISO <- substr(geoDataNew$file, 1, 2)
table(geoDataNew$ISO) # number of new wings per country
BY DE ES FR GB IE LT NL NO PL
171 60 919 164 425 3498 1358 3555 2274 16619
nrow(geoDataNewSample) # number of new samples[1] 1342
table(geoDataNewSample$ISO) # number of new samples per country
BY DE ES FR GB IE LT NL NO PL
5 4 57 11 29 304 76 125 84 647
nrow(dataRaw) # number of all wings[1] 38970
table(dataRaw$ISO) # number of all wings per country
BY CU DE ES FR GB IE LT NL NO PL PT
171 449 60 3482 164 425 3498 1358 3555 2274 22574 960
nrow(dataGeoSample) # number of all samples[1] 2312
table(dataGeoSample$ISO) # number of all samples per country
BY CU DE ES FR GB IE LT NL NO PL PT
5 9 4 573 11 29 304 76 125 84 900 192
PCA
PCA <- prcomp(dataSample[, nameXy], scale. = TRUE)
PCscores <- as.data.frame(PCA$x[, namePc])
PCscores$country <- as.factor(dataSample$country)
# create plot labels
variance.tab <- summary(PCA)$importance
variance <- variance.tab["Proportion of Variance", "PC1"]
variance <- round(100 * variance, 1)
xLabel <- paste0("PC1 (", variance, "%)")
variance <- variance.tab["Proportion of Variance", "PC2"]
variance <- round(100 * variance, 1)
yLabel <- paste0("PC2 (", variance, "%)")
n <- length(unique(PCscores$country))
figPca <- ggplot(PCscores, aes(x = PC1, y = PC2, shape = country, color = country)) +
geom_point() + scale_shape_manual(name = "", values = c(1:n)) + scale_color_manual(name = "",
values = rainbow(n)) + stat_ellipse() + xlab(xLabel) + ylab(yLabel)
figPcaLDA
# use equal prior probability for all groups
n.gr <- length(unique(PCscores$country)) # number of groups
lda <- CVA(PCscores[namePc], PCscores$country, rounds = 10000, cv = TRUE, prior = rep(1/n.gr,
n.gr))
ldaScores <- as.data.frame(lda$CVscores)
colnames(ldaScores) <- gsub("CV ", "LD", colnames(ldaScores))
# ldaScores <- cbind(sample.dataGeo, ldaScores)
ldaScores$country <- PCscores$country
ldaScores$ISO <- dataGeoSample$ISO
figLda <- ggplot(ldaScores, aes(x = LD1, y = LD2, shape = country, color = country)) +
geom_point() + scale_shape_manual(name = "", values = c(1:n)) + scale_color_manual(name = "",
values = rainbow(n)) + stat_ellipse()
figLdaggplot(ldaScores, aes(x = LD1, y = LD3, shape = country, color = country)) + geom_point() +
scale_shape_manual(name = "", values = c(1:n)) + scale_color_manual(name = "",
values = rainbow(n)) + stat_ellipse()# Figure 3
ggarrange(figPca, figLda, labels = c("a", "b"), font.label = list(size = 12, face = "bold"),
ncol = 2, nrow = 1, common.legend = TRUE, legend = "bottom")# Confusion matrix Classification of the dataSample to groups
ldaclass <- typprobClass(lda$CVscores, groups = ldaScores$ISO, outlier = 0)
print(ldaclass) cross-validated classification results in frequencies
BY CU DE ES FR GB IE LT NL NO PL PT
BY 5 0 0 0 0 0 0 0 0 0 0 0
CU 0 9 0 0 0 0 0 0 0 0 0 0
DE 0 0 4 0 0 0 0 0 0 0 0 0
ES 3 0 0 440 0 0 2 0 0 1 1 126
FR 2 0 0 0 9 0 0 0 0 0 0 0
GB 0 0 0 0 0 29 0 0 0 0 0 0
IE 0 0 0 0 2 0 283 18 0 0 1 0
LT 0 0 0 0 2 0 6 62 0 0 6 0
NL 0 0 0 0 0 0 0 0 122 0 3 0
NO 0 0 0 0 0 0 0 0 0 84 0 0
PL 17 1 2 0 1 0 0 0 1 10 867 1
PT 0 0 0 47 0 0 0 0 0 0 0 145
cross-validated classification result in %
BY CU DE ES FR GB IE
BY 100.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
CU 0.00000 100.00000 0.00000 0.00000 0.00000 0.00000 0.00000
DE 0.00000 0.00000 100.00000 0.00000 0.00000 0.00000 0.00000
ES 0.52356 0.00000 0.00000 76.78883 0.00000 0.00000 0.34904
FR 18.18182 0.00000 0.00000 0.00000 81.81818 0.00000 0.00000
GB 0.00000 0.00000 0.00000 0.00000 0.00000 100.00000 0.00000
IE 0.00000 0.00000 0.00000 0.00000 0.65789 0.00000 93.09211
LT 0.00000 0.00000 0.00000 0.00000 2.63158 0.00000 7.89474
NL 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
NO 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
PL 1.88889 0.11111 0.22222 0.00000 0.11111 0.00000 0.00000
PT 0.00000 0.00000 0.00000 24.47917 0.00000 0.00000 0.00000
LT NL NO PL PT
BY 0.00000 0.00000 0.00000 0.00000 0.00000
CU 0.00000 0.00000 0.00000 0.00000 0.00000
DE 0.00000 0.00000 0.00000 0.00000 0.00000
ES 0.00000 0.00000 0.17452 0.17452 21.98953
FR 0.00000 0.00000 0.00000 0.00000 0.00000
GB 0.00000 0.00000 0.00000 0.00000 0.00000
IE 5.92105 0.00000 0.00000 0.32895 0.00000
LT 81.57895 0.00000 0.00000 7.89474 0.00000
NL 0.00000 97.60000 0.00000 2.40000 0.00000
NO 0.00000 0.00000 100.00000 0.00000 0.00000
PL 0.00000 0.11111 1.11111 96.33333 0.11111
PT 0.00000 0.00000 0.00000 0.00000 75.52083
overall classification accuracy: 89.05709 %
Kappa statistic: 0.8575
# Mahalanobis distances between groups
knitr::kable(as.data.frame(as.matrix(lda$Dist$GroupdistMaha)), digits = 3)| Belarus | Cuba | France | Germany | Ireland | Lithuania | Netherlands | Norway | Poland | Portugal | Spain | UK | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Belarus | 0.000 | 6.678 | 6.397 | 6.699 | 6.473 | 7.036 | 6.756 | 5.493 | 4.738 | 7.210 | 6.836 | 8.376 |
| Cuba | 6.678 | 0.000 | 9.098 | 9.360 | 7.963 | 9.459 | 8.179 | 7.411 | 6.531 | 8.915 | 8.715 | 10.034 |
| France | 6.397 | 9.098 | 0.000 | 5.514 | 7.008 | 6.871 | 8.205 | 6.244 | 6.100 | 8.629 | 7.939 | 9.504 |
| Germany | 6.699 | 9.360 | 5.514 | 0.000 | 7.987 | 7.720 | 9.283 | 8.115 | 5.742 | 9.560 | 9.208 | 9.815 |
| Ireland | 6.473 | 7.963 | 7.008 | 7.987 | 0.000 | 3.778 | 8.752 | 6.579 | 5.137 | 7.205 | 6.616 | 8.580 |
| Lithuania | 7.036 | 9.459 | 6.871 | 7.720 | 3.778 | 0.000 | 9.361 | 7.585 | 6.338 | 8.249 | 7.679 | 9.190 |
| Netherlands | 6.756 | 8.179 | 8.205 | 9.283 | 8.752 | 9.361 | 0.000 | 6.684 | 6.553 | 8.067 | 8.113 | 9.289 |
| Norway | 5.493 | 7.411 | 6.244 | 8.115 | 6.579 | 7.585 | 6.684 | 0.000 | 5.049 | 7.199 | 6.908 | 7.212 |
| Poland | 4.738 | 6.531 | 6.100 | 5.742 | 5.137 | 6.338 | 6.553 | 5.049 | 0.000 | 6.447 | 6.332 | 7.289 |
| Portugal | 7.210 | 8.915 | 8.629 | 9.560 | 7.205 | 8.249 | 8.067 | 7.199 | 6.447 | 0.000 | 1.676 | 9.079 |
| Spain | 6.836 | 8.715 | 7.939 | 9.208 | 6.616 | 7.679 | 8.113 | 6.908 | 6.332 | 1.676 | 0.000 | 8.984 |
| UK | 8.376 | 10.034 | 9.504 | 9.815 | 8.580 | 9.190 | 9.289 | 7.212 | 7.289 | 9.079 | 8.984 | 0.000 |
# pairwise probabilities of differences between groups
knitr::kable(as.data.frame(as.matrix(lda$Dist$probsMaha)), digits = 5)| Belarus | Cuba | France | Germany | Ireland | Lithuania | Netherlands | Norway | Poland | Portugal | Spain | UK | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Belarus | 0.0000 | 6e-04 | 0.0005 | 0.0134 | 1e-04 | 1e-04 | 1e-04 | 7e-04 | 0.0053 | 1e-04 | 1e-04 | 1e-04 |
| Cuba | 0.0006 | 0e+00 | 0.0001 | 0.0001 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0001 | 1e-04 | 1e-04 | 1e-04 |
| France | 0.0005 | 1e-04 | 0.0000 | 0.0302 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0001 | 1e-04 | 1e-04 | 1e-04 |
| Germany | 0.0134 | 1e-04 | 0.0302 | 0.0000 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0014 | 1e-04 | 1e-04 | 1e-04 |
| Ireland | 0.0001 | 1e-04 | 0.0001 | 0.0001 | 0e+00 | 1e-04 | 1e-04 | 1e-04 | 0.0001 | 1e-04 | 1e-04 | 1e-04 |
| Lithuania | 0.0001 | 1e-04 | 0.0001 | 0.0001 | 1e-04 | 0e+00 | 1e-04 | 1e-04 | 0.0001 | 1e-04 | 1e-04 | 1e-04 |
| Netherlands | 0.0001 | 1e-04 | 0.0001 | 0.0001 | 1e-04 | 1e-04 | 0e+00 | 1e-04 | 0.0001 | 1e-04 | 1e-04 | 1e-04 |
| Norway | 0.0007 | 1e-04 | 0.0001 | 0.0001 | 1e-04 | 1e-04 | 1e-04 | 0e+00 | 0.0001 | 1e-04 | 1e-04 | 1e-04 |
| Poland | 0.0053 | 1e-04 | 0.0001 | 0.0014 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0000 | 1e-04 | 1e-04 | 1e-04 |
| Portugal | 0.0001 | 1e-04 | 0.0001 | 0.0001 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0001 | 0e+00 | 1e-04 | 1e-04 |
| Spain | 0.0001 | 1e-04 | 0.0001 | 0.0001 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0001 | 1e-04 | 0e+00 | 1e-04 |
| UK | 0.0001 | 1e-04 | 0.0001 | 0.0001 | 1e-04 | 1e-04 | 1e-04 | 1e-04 | 0.0001 | 1e-04 | 1e-04 | 0e+00 |
# wing shape differs significantely between countries
MANOVA <- manova(as.matrix(cbind(PCscores[, namePc])) ~ country, PCscores)
summary(MANOVA) Df Pillai approx F num Df den Df Pr(>F)
country 11 3.7993 35.335 374 25047 < 2.2e-16 ***
Residuals 2300
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Classification as lineages
Comparison with lineages dataset (Nawrocka, Kandemir, Fuchs, & Tofilski, 2018a, 2018b)
idData <- xml2gmLdaData("https://zenodo.org/records/14054009/files/apis-mellifera-lineage.dw.xml")
idM <- gmLdaData2id(idData, dataSample[, nameXy], average = FALSE)
idM$plottab <- table(idM$id$group)
tab
A C M O
636 184 1473 19
prop.table(tab) # proportion of colonies classified as each of the lineages
A C M O
0.275086505 0.079584775 0.637110727 0.008217993
tab <- table(idM$id$group, dataSample$ISO)
tab
BY CU DE ES FR GB IE LT NL NO PL PT
A 5 2 0 53 3 10 60 36 31 17 407 12
C 0 0 4 0 6 0 1 5 2 0 166 0
M 0 7 0 520 1 19 242 28 92 67 317 180
O 0 0 0 0 1 0 1 7 0 0 10 0
round(prop.table(tab, margin = 2), digits = 3)
BY CU DE ES FR GB IE LT NL NO PL PT
A 1.000 0.222 0.000 0.092 0.273 0.345 0.197 0.474 0.248 0.202 0.452 0.062
C 0.000 0.000 1.000 0.000 0.545 0.000 0.003 0.066 0.016 0.000 0.184 0.000
M 0.000 0.778 0.000 0.908 0.091 0.655 0.796 0.368 0.736 0.798 0.352 0.938
O 0.000 0.000 0.000 0.000 0.091 0.000 0.003 0.092 0.000 0.000 0.011 0.000
pTab <- as.data.frame(idM$P)
sums <- rowSums(pTab)
pTab <- pTab/sums
pTabLong <- pTab
pTabLong$sample <- rownames(pTabLong)
pTabLong <- melt(pTabLong, id.vars = c("sample"))
pTabLong$gr <- substr(pTabLong$sample, 1, 2) # short 2 letter name
ggplot(pTabLong, aes(sample, value, fill = variable)) + geom_col(width = 1) + scale_fill_manual(name = "classified as lineage",
values = rainbow(4)) + facet_grid(~gr, switch = "x", scales = "free", space = "free") +
theme_minimal() + labs(x = NULL, y = NULL) + scale_y_continuous(expand = c(0,
0)) + guides(fill = guide_legend(nrow = 1, byrow = TRUE)) + theme(panel.spacing.x = unit(0.2,
"lines"), axis.text.x = element_blank(), legend.position = "top")countrySmallN <- c("Belarus", "Cuba", "Germany", "France", "UK")
# countries with large N
pTabLong <- pTab
pTabLong <- pTabLong[!dataSample$country %in% countrySmallN, ]
pTabLong$sample <- rownames(pTabLong)
pTabLong <- melt(pTabLong, id.vars = c("sample"))
pTabLong$gr <- substr(pTabLong$sample, 1, 2) # short 2 letter name
figAdmA <- ggplot(pTabLong, aes(sample, value, fill = variable)) + geom_col(width = 1) +
scale_fill_manual(name = "classified as lineage", values = rainbow(4)) + facet_grid(~gr,
switch = "x", scales = "free", space = "free") + theme_minimal() + labs(x = NULL,
y = NULL) + scale_y_continuous(expand = c(0, 0)) + guides(fill = guide_legend(nrow = 1,
byrow = TRUE)) + theme(panel.spacing.x = unit(0.2, "lines"), axis.text.x = element_blank(),
legend.position = "top")
# countries with small N
pTabLong <- pTab
pTabLong <- pTabLong[dataSample$country %in% countrySmallN, ]
pTabLong$sample <- rownames(pTabLong)
pTabLong <- melt(pTabLong, id.vars = c("sample"))
pTabLong$gr <- substr(pTabLong$sample, 1, 2) # short 2 letter name
figAdmB <- ggplot(pTabLong, aes(sample, value, fill = variable)) + geom_col(width = 1) +
scale_fill_manual(name = "classified as lineage", values = rainbow(4)) + facet_grid(~gr,
switch = "x", scales = "free", space = "free") + theme_minimal() + labs(x = NULL,
y = NULL) + scale_y_continuous(expand = c(0, 0)) + guides(fill = guide_legend(nrow = 1,
byrow = TRUE)) + theme(panel.spacing.x = unit(0.2, "lines"), axis.text.x = element_blank(),
legend.position = "top") + theme(axis.text.y = element_blank())
figAdm <- ggarrange(figAdmA, figAdmB, ncol = 2, nrow = 1, widths = c(0.66, 0.33),
common.legend = TRUE, legend = "top")
dataGeoSample$lineage <- idM$id$group
ggplot(data = dataMap) + geom_sf() + geom_jitter(data = dataGeoSample, aes(x = longitude,
y = latitude, color = lineage), width = 0.4, height = 0.4, size = 0.7) + coord_sf(xlim = c(xMin,
xMax), ylim = c(yMin, yMax)) + scale_color_manual(name = "", values = rainbow(4))tabProp <- prop.table(tab, margin = 2)
tabProp <- t(tabProp)
tabProp <- tabProp[-2, ]
countryLin <- as.data.frame(tabProp)
colnames(countryLin) <- c("ISO", "lineage", "Freq")
countryLin <- merge(countryLin, dataGeoCountry, by = "ISO")
countryLin$longitude[countryLin$ISO == "BY"] <- 30 # move Belarus plot
countryLin$ISO <- gsub("GB", "UK", countryLin$ISO) # move Belarus plot
# Plot map with circular pie charts
dataMap_proj <- st_transform(dataMap, 3035) # Europe projection
europe_bbox <- st_bbox(c(xmin = xMin, xmax = xMax, ymin = yMin, ymax = 70), crs = st_crs(4326)) # Crop to Europe
europe_bbox <- st_transform(st_as_sfc(europe_bbox), 3035)
dataMap_europe <- st_crop(dataMap_proj, europe_bbox)
# Convert and project pie centers
countryLin_sf <- st_as_sf(countryLin, coords = c("longitude", "latitude"), crs = 4326)
countryLin_proj <- st_transform(countryLin_sf, 3035)
coords <- st_coordinates(countryLin_proj)
countryLin_proj <- cbind(countryLin, coords)
# r: radius in meters (adjust size)
figMapClass <- ggplot() + geom_sf(data = dataMap_europe) + geom_arc_bar(data = countryLin_proj,
aes(x0 = X, y0 = Y, r0 = 0, r = 150000, amount = Freq, fill = lineage), stat = "pie") +
geom_text(data = countryLin_proj, aes(x = X, y = Y, label = ISO), vjust = 2.5) +
scale_fill_manual(values = rainbow(4)) + coord_sf(expand = FALSE) + theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank())
figMapClass# Figure 4
ggarrange(figAdm, figMapClass, labels = c("a", "b"), font.label = list(size = 12,
face = "bold"), ncol = 1, nrow = 2, heights = c(0.5, 1.5))Changes in time
dataId <- cbind(dataSample[, c("latitude", "longitude", "date", "country")], idM$MD2,
idM$LD)
dataId$sample <- row.names(dataSample)
dataPl <- dataId[dataId$country == "Poland", ]
tmp <- str_split_fixed(dataPl$sample, "-", 2)
tmp <- as.numeric(tmp[, 2])
dataPl$sampleNr <- tmp
dataPl$years <- ifelse(dataPl$date < 2016, "before 2016", "after 2016")
dataPl$years <- factor(dataPl$years, levels = c("before 2016", "after 2016"))
dataPl$population <- ifelse((dataPl$sampleNr > 253 & dataPl$sampleNr < 680), "protected",
"unprotected")
dataPl$population <- as.factor(dataPl$population)
table(dataPl$population, dataPl$years)
before 2016 after 2016
protected 315 91
unprotected 241 253
fit <- lm(M ~ years * population, data = dataPl)
anova(fit)Analysis of Variance Table
Response: M
Df Sum Sq Mean Sq F value Pr(>F)
years 1 1027777 1027777 652.855 < 2.2e-16 ***
population 1 65352 65352 41.512 1.91e-10 ***
years:population 1 311219 311219 197.690 < 2.2e-16 ***
Residuals 896 1410556 1574
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm_date_by_pop <- emmeans(fit, ~years | population)
pairs(emm_date_by_pop, adjust = "tukey")population = protected:
contrast estimate SE df t.ratio p.value
before 2016 - after 2016 -11.2 4.72 896 -2.367 0.0181
population = unprotected:
contrast estimate SE df t.ratio p.value
before 2016 - after 2016 -94.4 3.57 896 -26.438 <.0001
emm_pop_by_date <- emmeans(fit, ~population | years)
pairs(emm_pop_by_date, adjust = "tukey")years = before 2016:
contrast estimate SE df t.ratio p.value
protected - unprotected 9.46 3.40 896 2.786 0.0054
years = after 2016:
contrast estimate SE df t.ratio p.value
protected - unprotected -73.78 4.85 896 -15.213 <.0001
# Figure 5
ggplot(dataPl, aes(x = population, y = M)) + geom_boxplot(aes(fill = years)) + scale_fill_brewer(palette = "BuPu") +
labs(y = "distance to lineage M")References
Machlowska, J., Gerula, D., Oleksa, A., Kok, H., McCormack, G., Valentine, A., … Tofilski, A. (2026). Fore wings of honey bees (apis mellifera) from northwestern europe. https://doi.org/10.5281/zenodo.17612437
Nawrocka, A., Kandemir, İ., Fuchs, S., & Tofilski, A. (2018a). Computer software for identification of honey bee subspecies and evolutionary lineages. Apidologie, 49(2), 172184. https://doi.org/10.1007/s13592-017-0538-y
Nawrocka, A., Kandemir, İ., Fuchs, S., & Tofilski, A. (2018b). Dataset: Computer software for identification of honey bee subspecies and evolutionary lineages. Apidologie, 49, 172184. https://doi.org/10.5281/zenodo.7498540
Oleksa, A., Căuia, E., Siceanu, A., Puškadija, Z., Kovačić, M., Pinto, M. A., … Tofilski, A. (2022). Collection of wing images for conservation of honey bees (Apis mellifera) biodiversity in Europe. Zenodo. https://doi.org/10.5281/zenodo.7244070
Tofilski, A. (2025). Measurements of honey bee (apis mellifera) wings originally provided by masaquiza et al. 2023. https://doi.org/10.5281/zenodo.17621032
Information about session
sessionInfo()R version 4.5.0 (2025-04-11 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26200)
Matrix products: default
LAPACK version 3.12.1
locale:
[1] LC_COLLATE=en_US.UTF-8 LC_CTYPE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[4] LC_NUMERIC=C LC_TIME=en_US.UTF-8
time zone: Europe/Warsaw
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggforce_0.5.0 ggpubr_0.6.2 sf_1.0-21
[4] terra_1.8-80 rnaturalearth_1.1.0 ggplot2_4.0.0
[7] emmeans_2.0.0 stringr_1.6.0 reshape2_1.4.4
[10] IdentiFlyR_0.1.1 Morpho_2.13 geomorph_4.0.10
[13] Matrix_1.7-3 rgl_1.3.24 RRPP_2.1.2
loaded via a namespace (and not attached):
[1] DBI_1.2.3 gridExtra_2.3 formatR_1.14
[4] sandwich_3.1-1 rlang_1.1.6 magrittr_2.0.4
[7] multcomp_1.4-29 e1071_1.7-16 compiler_4.5.0
[10] vctrs_0.6.5 pkgconfig_2.0.3 fastmap_1.2.0
[13] backports_1.5.0 labeling_0.4.3 rmarkdown_2.30
[16] purrr_1.2.0 xfun_0.54.1 Rvcg_0.25
[19] jsonlite_2.0.0 tweenr_2.0.3 jpeg_0.1-11
[22] broom_1.0.10 parallel_4.5.0 R6_2.6.1
[25] stringi_1.8.7 RColorBrewer_1.1-3 bezier_1.1.2
[28] car_3.1-3 estimability_1.5.1 Rcpp_1.1.0
[31] iterators_1.0.14 knitr_1.50 zoo_1.8-14
[34] base64enc_0.1-3 splines_4.5.0 tidyselect_1.2.1
[37] rstudioapi_0.17.1 abind_1.4-8 yaml_2.3.10
[40] doParallel_1.0.17 codetools_0.2-20 minpack.lm_1.2-4
[43] curl_7.0.0 lattice_0.22-6 tibble_3.3.0
[46] plyr_1.8.9 withr_3.0.2 S7_0.2.0
[49] coda_0.19-4.1 evaluate_1.0.5 survival_3.8-3
[52] rnaturalearthdata_1.0.0 units_1.0-0 proxy_0.4-27
[55] polyclip_1.10-7 fitdistrplus_1.2-4 xml2_1.4.1
[58] mclust_6.1.2 pillar_1.11.1 carData_3.0-5
[61] KernSmooth_2.23-26 foreach_1.5.2 generics_0.1.4
[64] colorRamps_2.3.4 scales_1.4.0 xtable_1.8-4
[67] class_7.3-23 glue_1.8.0 scatterplot3d_0.3-44
[70] tools_4.5.0 ggsignif_0.6.4 mvtnorm_1.3-3
[73] cowplot_1.2.0 grid_4.5.0 tidyr_1.3.1
[76] ape_5.8-1 nlme_3.1-168 Formula_1.2-5
[79] cli_3.6.5 dplyr_1.1.4 gtable_0.3.6
[82] shapes_1.2.8 rstatix_0.7.3 digest_0.6.37
[85] classInt_0.4-11 ggrepel_0.9.6 TH.data_1.1-4
[88] htmlwidgets_1.6.4 farver_2.1.2 htmltools_0.5.8.1
[91] lifecycle_1.0.4 MASS_7.3-65