salmonanalysis.R

Jake Heare — Mar 3, 2014, 1:39 PM

### R script for Fish 444 pop structure lab
### Charlie Waters 1/30/2014

#########################################
# 1. Set up R session
#########################################


#B. Load the installed packages in to R for use
#Again, hit Ctrl+Enter for each line separately
#Or use R studio panel

library(ape)  
library(ade4)
library(adegenet)
   ==========================
    adegenet 1.3-9.2 is loaded
   ==========================

 - to start, type '?adegenet'
 - to browse adegenet website, type 'adegenetWeb()'
 - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
library(diveRsity)
library(doParallel)
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
library(foreach)
library(genetics)
Loading required package: combinat

Attaching package: 'combinat'

The following object is masked from 'package:utils':

    combn

Loading required package: gdata
gdata: Unable to locate valid perl interpreter
gdata: 
gdata: read.xls() will be unable to read Excel XLS and XLSX files
gdata: unless the 'perl=' argument is used to specify the location
gdata: of a valid perl intrpreter.
gdata: 
gdata: (To avoid display of this message in the future, please
gdata: ensure perl is installed and available on the executable
gdata: search path.)
gdata: Unable to load perl libaries needed by read.xls()
gdata: to support 'XLX' (Excel 97-2004) files.

gdata: Unable to load perl libaries needed by read.xls()
gdata: to support 'XLSX' (Excel 2007+) files.

gdata: Run the function 'installXLSXsupport()'
gdata: to automatically download and install the perl
gdata: libaries needed to support Excel XLS and XLSX formats.

Attaching package: 'gdata'

The following object is masked from 'package:stats':

    nobs

The following object is masked from 'package:utils':

    object.size

Loading required package: gtools
Loading required package: MASS
Loading required package: mvtnorm


NOTE: THIS PACKAGE IS NOW OBSOLETE.



  The R-Genetics project has developed an set of enhanced genetics

  packages to replace 'genetics'. Please visit the project homepage

  at http://rgenetics.org for informtion.




Attaching package: 'genetics'

The following objects are masked from 'package:base':

    %in%, as.factor, order
library(hierfstat)

Attaching package: 'hierfstat'

The following object is masked from 'package:genetics':

    allele.count

The following object is masked from 'package:adegenet':

    read.fstat

The following objects are masked from 'package:ape':

    pcoa, varcomp
library(iterators)
library(parallel)
library(sendplot)
Loading required package: rtiff
Loading required package: pixmap
library(xlsx)
Loading required package: rJava
Loading required package: xlsxjars
library(BDgraph)
Loading required package: huge
Loading required package: Matrix
Loading required package: lattice
Loading required package: igraph

Attaching package: 'igraph'

The following object is masked from 'package:ape':

    edges


Attaching package: 'BDgraph'

The following object is masked from 'package:igraph':

    compare

The following object is masked from 'package:MASS':

    select

# C. Set the working directory to a specific folder 
#just change my UWNetID to yours 
#and change the folder to the one you created on the Desktop

setwd("C:/Users/Christine Savolainen/Desktop/Bio Informatics/Con Gen") 

#D. import a Genepop file and saves it as "cod_data_genepop" 
#in the form of a "genind" object
salmon <- read.genepop("Class_data_genepop_new.gen", missing=NA)  

 Converting data from a Genepop .gen file to a genind object... 


File description:  Genepop file for FISH 444 Class project: 350 individuals genotyped at 200 RAD loci 

...done.

#E. Gives a summary of the data, 
#including number of individuals, alleles per locus, etc.
summary(salmon)  

 # Total number of genotypes:  350 

 # Population sample sizes:  
Founders1998_260_M      INT2002_434_M      SEG2002_432_M 
                50                 50                 50 
     INT2006_412_F      SEG2006_382_F     INT2010_8060_F 
                50                 50                 50 
    SEG2010_9162_J 
                50 

 # Number of alleles per locus:  
L001 L002 L003 L004 L005 L006 L007 L008 L009 L010 L011 L012 L013 L014 L015 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L016 L017 L018 L019 L020 L021 L022 L023 L024 L025 L026 L027 L028 L029 L030 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L031 L032 L033 L034 L035 L036 L037 L038 L039 L040 L041 L042 L043 L044 L045 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L046 L047 L048 L049 L050 L051 L052 L053 L054 L055 L056 L057 L058 L059 L060 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L061 L062 L063 L064 L065 L066 L067 L068 L069 L070 L071 L072 L073 L074 L075 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L076 L077 L078 L079 L080 L081 L082 L083 L084 L085 L086 L087 L088 L089 L090 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L091 L092 L093 L094 L095 L096 L097 L098 L099 L100 L101 L102 L103 L104 L105 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L106 L107 L108 L109 L110 L111 L112 L113 L114 L115 L116 L117 L118 L119 L120 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L121 L122 L123 L124 L125 L126 L127 L128 L129 L130 L131 L132 L133 L134 L135 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L136 L137 L138 L139 L140 L141 L142 L143 L144 L145 L146 L147 L148 L149 L150 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L151 L152 L153 L154 L155 L156 L157 L158 L159 L160 L161 L162 L163 L164 L165 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L166 L167 L168 L169 L170 L171 L172 L173 L174 L175 L176 L177 L178 L179 L180 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L181 L182 L183 L184 L185 L186 L187 L188 L189 L190 L191 L192 L193 L194 L195 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2    2 
L196 L197 L198 L199 L200 
   2    2    2    2    2 

 # Number of alleles per population:  
  1   2   3   4   5   6   7 
398 398 400 399 398 396 392 

 # Percentage of missing data:  
[1] 17.45

 # Observed heterozygosity:  
   L001    L002    L003    L004    L005    L006    L007    L008    L009 
0.31104 0.26512 0.33984 0.33459 0.23397 0.37786 0.20000 0.32414 0.10135 
   L010    L011    L012    L013    L014    L015    L016    L017    L018 
0.49538 0.29480 0.26543 0.15541 0.23103 0.45017 0.20863 0.20345 0.36879 
   L019    L020    L021    L022    L023    L024    L025    L026    L027 
0.37000 0.46690 0.38801 0.20833 0.16475 0.18092 0.53468 0.36000 0.35275 
   L028    L029    L030    L031    L032    L033    L034    L035    L036 
0.46284 0.17627 0.37113 0.14876 0.35786 0.05034 0.24272 0.11963 0.14508 
   L037    L038    L039    L040    L041    L042    L043    L044    L045 
0.42633 0.44000 0.45607 0.35202 0.30000 0.23779 0.40467 0.47734 0.42222 
   L046    L047    L048    L049    L050    L051    L052    L053    L054 
0.15517 0.38966 0.19333 0.14430 0.43478 0.51807 0.07430 0.22082 0.25949 
   L055    L056    L057    L058    L059    L060    L061    L062    L063 
0.32292 0.50495 0.20275 0.17358 0.40000 0.31683 0.13571 0.27941 0.21805 
   L064    L065    L066    L067    L068    L069    L070    L071    L072 
0.09766 0.42612 0.44108 0.19512 0.43807 0.49091 0.10294 0.39322 0.47768 
   L073    L074    L075    L076    L077    L078    L079    L080    L081 
0.12303 0.07595 0.47312 0.40850 0.14953 0.10769 0.26277 0.32278 0.14094 
   L082    L083    L084    L085    L086    L087    L088    L089    L090 
0.43554 0.42800 0.37584 0.14493 0.46729 0.22857 0.29934 0.12963 0.45937 
   L091    L092    L093    L094    L095    L096    L097    L098    L099 
0.17699 0.41158 0.45139 0.07895 0.40000 0.44482 0.21705 0.19164 0.45455 
   L100    L101    L102    L103    L104    L105    L106    L107    L108 
0.36531 0.46835 0.65705 0.35017 0.10811 0.12615 0.14433 0.21605 0.08278 
   L109    L110    L111    L112    L113    L114    L115    L116    L117 
0.29553 0.30132 0.12195 0.21384 0.36299 0.20775 0.17264 0.27596 0.32517 
   L118    L119    L120    L121    L122    L123    L124    L125    L126 
0.21104 0.25000 0.40071 0.41837 0.15806 0.13110 0.31034 0.08986 0.29195 
   L127    L128    L129    L130    L131    L132    L133    L134    L135 
0.41199 0.25574 0.09350 0.10448 0.16508 0.11462 0.31930 0.37346 0.33670 
   L136    L137    L138    L139    L140    L141    L142    L143    L144 
0.31950 0.40426 0.30038 0.42763 0.42368 0.42182 0.15484 0.31928 0.34146 
   L145    L146    L147    L148    L149    L150    L151    L152    L153 
0.18323 0.21406 0.36280 0.09286 0.11838 0.19118 0.15909 0.28378 0.45614 
   L154    L155    L156    L157    L158    L159    L160    L161    L162 
0.12420 0.46230 0.18284 0.16667 0.10039 0.43519 0.18254 0.18750 0.05724 
   L163    L164    L165    L166    L167    L168    L169    L170    L171 
0.24691 0.43396 0.19142 0.27178 0.33546 0.20221 0.13505 0.34343 0.39310 
   L172    L173    L174    L175    L176    L177    L178    L179    L180 
0.38344 0.12287 0.10857 0.04221 0.24252 0.37037 0.04089 0.27508 0.10458 
   L181    L182    L183    L184    L185    L186    L187    L188    L189 
0.28959 0.44514 0.02715 0.42671 0.19856 0.28740 0.45946 0.31692 0.18391 
   L190    L191    L192    L193    L194    L195    L196    L197    L198 
0.23794 0.35000 0.39577 0.31529 0.13514 0.47260 0.36316 0.11679 0.38255 
   L199    L200 
0.16495 0.30508 

 # Expected heterozygosity:  
   L001    L002    L003    L004    L005    L006    L007    L008    L009 
0.34589 0.33368 0.45708 0.38959 0.28833 0.48892 0.25936 0.34397 0.16579 
   L010    L011    L012    L013    L014    L015    L016    L017    L018 
0.49727 0.47327 0.28593 0.18752 0.24525 0.49601 0.23076 0.22004 0.47885 
   L019    L020    L021    L022    L023    L024    L025    L026    L027 
0.43639 0.49897 0.46147 0.31043 0.18245 0.18576 0.39441 0.49060 0.38838 
   L028    L029    L030    L031    L032    L033    L034    L035    L036 
0.49301 0.20394 0.41355 0.17202 0.39958 0.07420 0.30291 0.16460 0.17751 
   L037    L038    L039    L040    L041    L042    L043    L044    L045 
0.49740 0.43520 0.49314 0.35645 0.35179 0.26157 0.49527 0.49375 0.40952 
   L046    L047    L048    L049    L050    L051    L052    L053    L054 
0.18549 0.46656 0.19580 0.18375 0.45369 0.50000 0.08293 0.23907 0.28337 
   L055    L056    L057    L058    L059    L060    L061    L062    L063 
0.32783 0.49861 0.30010 0.20078 0.49234 0.33700 0.16263 0.36754 0.41758 
   L064    L065    L066    L067    L068    L069    L070    L071    L072 
0.10686 0.46679 0.44103 0.21940 0.47364 0.49870 0.12359 0.42135 0.47201 
   L073    L074    L075    L076    L077    L078    L079    L080    L081 
0.14263 0.08468 0.48581 0.45578 0.17700 0.10737 0.33792 0.31541 0.18107 
   L082    L083    L084    L085    L086    L087    L088    L089    L090 
0.49706 0.49231 0.43910 0.17066 0.49825 0.24941 0.31881 0.13717 0.48523 
   L091    L092    L093    L094    L095    L096    L097    L098    L099 
0.18983 0.45335 0.47046 0.08786 0.49330 0.46597 0.27258 0.22201 0.47987 
   L100    L101    L102    L103    L104    L105    L106    L107    L108 
0.49670 0.49662 0.48664 0.41143 0.14901 0.13940 0.15706 0.25673 0.08541 
   L109    L110    L111    L112    L113    L114    L115    L116    L117 
0.32328 0.30417 0.14597 0.23387 0.43794 0.22921 0.22904 0.28691 0.42468 
   L118    L119    L120    L121    L122    L123    L124    L125    L126 
0.20893 0.25551 0.49139 0.44099 0.18770 0.15868 0.33725 0.09108 0.33536 
   L127    L128    L129    L130    L131    L132    L133    L134    L135 
0.49236 0.27412 0.13910 0.09902 0.18757 0.13555 0.30496 0.39426 0.35670 
   L136    L137    L138    L139    L140    L141    L142    L143    L144 
0.37187 0.45131 0.36234 0.43213 0.44650 0.49650 0.16433 0.38961 0.39262 
   L145    L146    L147    L148    L149    L150    L151    L152    L153 
0.17651 0.23469 0.36410 0.12651 0.12768 0.22424 0.17757 0.31919 0.49551 
   L154    L155    L156    L157    L158    L159    L160    L161    L162 
0.14923 0.49052 0.21875 0.21176 0.11592 0.45332 0.20975 0.33228 0.06193 
   L163    L164    L165    L166    L167    L168    L169    L170    L171 
0.25240 0.49929 0.20939 0.28773 0.34901 0.20479 0.14243 0.36028 0.41151 
   L172    L173    L174    L175    L176    L177    L178    L179    L180 
0.42882 0.13890 0.18000 0.04752 0.23289 0.41424 0.04716 0.28632 0.11647 
   L181    L182    L183    L184    L185    L186    L187    L188    L189 
0.44242 0.46450 0.04423 0.49641 0.21807 0.42094 0.49952 0.33973 0.22660 
   L190    L191    L192    L193    L194    L195    L196    L197    L198 
0.26985 0.39592 0.43859 0.39338 0.15465 0.49916 0.40913 0.49976 0.46397 
   L199    L200 
0.19575 0.40040 



######################################################################################################
# CODE FOR PART A - TEST WHETHER THE POPULATIONS DEVIATE FROM HARDY-WEINBERG EQUILIBRIUM
######################################################################################################

### Break up the entire data set into genind objects for each population
pop_labels <- c(rep("F1998",50),rep("I2002",50),
  rep("S2002",50),rep("I2006",50),rep("S2006",50),rep("I2010",50),
  rep("S2010",50) )
## Creates a vector containing the population assignments of each individual
#numbers in code are numbers of individuals in each population


## Creates a list of genind objects for each population
pops_separated <- seppop(salmon,pop=pop_labels)
names(pops_separated)
[1] "F1998" "I2002" "I2006" "I2010" "S2002" "S2006" "S2010"


#Creates a genind object comprising only the AD individuals
data_Founders1998 <-pops_separated$Founders1998 
#Verify that the genind object has the correct number of individuals and loci
data_Founders1998                      
NULL
###Repeat for all populations
data_Int2002 <-pops_separated$Int2002  
data_Seg2002 <-pops_separated$Seg2002
data_Int2006 <-pops_separated$Int2006
data_Seg2006 <-pops_separated$Seg2006
data_Int2010 <-pops_separated$Int2010
data_Seg2010 <-pops_separated$Seg2010


#### Compute observed and expected heterzygosity for 
#each population over all loci
summary_Founders1998 <- summary(data_Founders1998)
mean(summary_Founders1998$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Founders1998$Hexp : 
  $ operator is invalid for atomic vectors
mean(summary_Founders1998$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Founders1998$Hobs : 
  $ operator is invalid for atomic vectors
### Test whether Hobs and Hexp are significantly different
t.test(summary_Founders1998$Hobs,summary_Founders1998$Hexp,paired=TRUE)  
Error: $ operator is invalid for atomic vectors

summary_Int2002 <- summary(data_Int2002)
mean(summary_Int2002$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Int2002$Hexp : $ operator is invalid for atomic vectors
mean(summary_Int2002$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Int2002$Hobs : $ operator is invalid for atomic vectors
t.test(summary_Int2002$Hobs,summary_Int2002$Hexp,paired=TRUE)
Error: $ operator is invalid for atomic vectors

summary_Seg2002 <- summary(data_Seg2002)
mean(summary_Seg2002$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Seg2002$Hexp : $ operator is invalid for atomic vectors
mean(summary_Seg2002$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Seg2002$Hobs : $ operator is invalid for atomic vectors
t.test(summary_Seg2002$Hobs,summary_Seg2002$Hexp,paired=TRUE)
Error: $ operator is invalid for atomic vectors

summary_Int2006 <- summary(data_Int2006)
mean(summary_Int2006$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Int2006$Hexp : $ operator is invalid for atomic vectors
mean(summary_Int2006$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Int2006$Hobs : $ operator is invalid for atomic vectors
t.test(summary_Int2006$Hobs,summary_Int2006$Hexp,paired=TRUE)
Error: $ operator is invalid for atomic vectors

summary_Seg2006 <- summary(data_Seg2006)
mean(summary_Seg2006$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Seg2006$Hexp : $ operator is invalid for atomic vectors
mean(summary_Seg2006$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Seg2006$Hobs : $ operator is invalid for atomic vectors
t.test(summary_Seg2006$Hobs,summary_Seg2006$Hexp,paired=TRUE)
Error: $ operator is invalid for atomic vectors

summary_Int2010 <- summary(data_Int2010)
mean(summary_Int2010$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Int2010$Hexp : $ operator is invalid for atomic vectors
mean(summary_Int2010$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Int2010$Hobs : $ operator is invalid for atomic vectors
t.test(summary_Int2010$Hobs,summary_Int2010$Hexp,paired=TRUE)
Error: $ operator is invalid for atomic vectors

summary_Seg2010 <- summary(data_Seg2010)
mean(summary_Seg2010$Hexp)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Seg2010$Hexp : $ operator is invalid for atomic vectors
mean(summary_Seg2010$Hobs)
Error: error in evaluating the argument 'x' in selecting a method for function 'mean': Error in summary_Seg2010$Hobs : $ operator is invalid for atomic vectors
t.test(summary_Seg2010$Hobs,summary_Seg2010$Hexp,paired=TRUE)
Error: $ operator is invalid for atomic vectors

##################################################################################################################################
### QUESTION:
### ARE THERE ANY POPULATIONS THAT SIGNIFICANTLY DEVIATE FROM HWE? IF SO, WHICH ONES?
##################################################################################################################################



### Now test for deviations from HWE at each locus within each population

#This test uses simulation to compute a pvalue for HWE
HWE_test_results <- HWE.test(salmon,pop=NULL,permut=TRUE,
  nsim=10000,res.type="matrix") 

#We want to correct for multiple tests, even though the Bonferroni is conservative
corrected_pval <- 0.05/(11*11) 
corrected_pval
[1] 0.0004132
#Creates a table of True/False for loci that are out of HWE (TRUE=out of HWE)
HWE_logical <- HWE_test_results<corrected_pval  
#Identify which loci are out of HWE in the various populations
HWE_logical   
   X12340 X21612 X00212 X00279 X00571 X00873 X01197 X01425 X01503 X01559
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE   TRUE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE     NA  FALSE
   X01585 X01737 X01809 X01925 X02287 X02407 X02570 X02849 X03281 X03293
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X03779 X04657 X04964 X05185 X05291 X06389 X06437 X06655 X07321 X07406
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X07867 X08112 X08504 X08541 X08734 X09244 X09343 X09888 X10039 X10545
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE   TRUE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X10589 X10789 X11021 X11088 X11328 X11377 X11930 X11970 X12622 X12843
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X12859 X13013 X13024 X13271 X13752 X13886 X14006 X14181 X14545 X14760
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X15462 X15813 X15926 X16017 X16186 X16535 X16783 X16920 X17077 X17179
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X17189 X17397 X17794 X17960 X18067 X19148 X19299 X19408 X19893 X19899
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X19910 X20190 X20876 X21229 X21359 X21640 X21917 X22070 X22625 X22694
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X22730 X23027 X23461 X24163 X24786 X25336 X25635 X25692 X25878 X25936
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X26213 X26218 X26243 X26426 X26993 X27106 X27342 X27572 X28224 X28293
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE   TRUE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE     NA     NA  FALSE  FALSE  FALSE  FALSE  FALSE
   X28488 X28645 X29061 X29172 X29400 X29472 X29635 X30038 X30106 X30957
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE   TRUE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X31310 X32370 X32453 X32715 X32745 X32862 X32976 X33228 X33271 X33462
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE     NA  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE     NA  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X33946 X34030 X34124 X34321 X34663 X35058 X35634 X35741 X35959 X35980
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X36059 X36117 X36158 X36360 X36658 X36885 X37007 X38049 X38060 X38093
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X38282 X38910 X38984 X39090 X39130 X39768 X40801 X41089 X41554 X41785
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE     NA  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X41899 X42382 X42426 X42448 X42931 X43220 X43261 X43380 X44126 X44886
P1  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X44899 X45019 X45117 X46335 X46574 X47388 X47719 X48155 X48868 X49164
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE     NA  FALSE  FALSE     NA  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE     NA  FALSE     NA
   X49385 X49562 X49729 X50357 X51026 X51617 X52897 X52903 X53265 X53439
P1  FALSE  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE     NA  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
   X54650 X54750 X54822 X55255 X55485 X55507 X56310 X56673 X00010 X00021
P1  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P2  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE   TRUE  FALSE  FALSE  FALSE
P3  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P4  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE   TRUE  FALSE  FALSE  FALSE
P5  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P6  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE
P7  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE  FALSE


#################################################################################################################################
### QUESTION: 
### IS THERE ANY EVIDENCE FOR DEVIATIONS FROM HWE AT ANY OF THE LOCI? 
### EXPORT THE HW_TEST_RESULTS TABLE, BOLD THE LOCI.
write.table(HWE_test_results,file="HWresults.csv",sep=",",row.names=F)
##################################################################################################################################


######################################################################################################
# CODE FOR PART B - TEST FOR GENETIC DIFFERENTIATION BETWEEN PAIRS OF POPULATIONS 
######################################################################################################


#####Test to see if population has a significant effect on genetic structure
#(A test for homogeneity across all populations)

###converts data for use in package "hierfstat"
data_hierfstat <- genind2hierfstat(salmon)  

#Create a vector that assigns each individual (total 823) to one panmictic population
one_pop <- rep(1,350)
#Creates a vector of the actual population (or subpopulation) assignments 
levels <- data_hierfstat[,1]  
#Create a data frame only of locus data (formatted for hierfstat)
locus_data <- data_hierfstat[,2:201]  
#Permutes the populations and estimates G statistics 
testwithin <- test.within(locus_data,test.lev=levels,within=one_pop,nperm=1000) 
testwithin$p.val    
[1] 0.455

#################################################################################################################################
### QUESTION: 
###  Is there evidence that the samples comprise one single population?
##################################################################################################################################


######################################################################################################
# CODE FOR PART C - DERIVE GENETIC DISTANCES BETWEEN POPULATIONS (FST) 
######################################################################################################

#### Calculate pairwise Fst per population
##Function in package "diveRsity" that calculates pairwise W&C's Fst; 
#Samples individuals with replacement to create a new dataset and recalculates Fst 
#repeats 100 times to generate a mean and 95% CI's 

pop_stats <- fstOnly(infile="Class_data_genepop_FST.txt",outfile="Salmon_Pairwise_Fst",
  gp=3,bs_pairwise=TRUE,bootstraps=100,parallel=TRUE) 

### Open the output from the function above 
#and examine the Pairwise Fst values with confidence intervals

#################################################################################################################################
### QUESTION: 
### EXAMINE THE CONFIDENCE INTERVALS FOR ALL PAIRWISE COMPARISIONS. 
# WHICH POPULATION PAIRWISE COMPARISONS ARE SIGNIFICANTLY DIFFERENT FROM ZERO? 
#HINT: THEY ARE DIFFERENT IF THEIR CI'S DO NOT INCLUDE ZERO
write.table(pop_stats,file="FSTresults.csv",sep=",",row.names=T)
##################################################################################################################################

#### Before lab, I took the list of actual pairwise Fst values 
#and transformed them into a simple pairwise matrix (file=Fst.csv). 
#Import this matrix now. 
pairwise_fst <- read.csv("FSTresults.csv",header=TRUE,row.names=1)
pairwise_fst
                Fst.actual Fst.lower_CI Fst.upper_CI Fit.actual
Founde v 2002IN     0.0015       0.0013       0.0017    -0.5834
Founde v 2002SE     0.0028       0.0025       0.0031    -0.5807
Founde v 2006IN     0.0014       0.0012       0.0016    -0.5835
Founde v 2006SE     0.0083       0.0079       0.0087    -0.5792
Founde v 2010IN     0.0030       0.0028       0.0032    -0.5833
Founde v 2010SE     0.0163       0.0159       0.0167    -0.5838
2002IN v 2002SE     0.0000      -0.0002       0.0002    -0.5928
2002IN v 2006IN    -0.0002      -0.0004       0.0000    -0.5936
2002IN v 2006SE     0.0030       0.0028       0.0032    -0.5954
2002IN v 2010IN    -0.0003      -0.0005      -0.0001    -0.5961
2002IN v 2010SE     0.0089       0.0086       0.0092    -0.6040
2002SE v 2006IN    -0.0004      -0.0006      -0.0002    -0.5931
2002SE v 2006SE     0.0004       0.0002       0.0006    -0.5986
2002SE v 2010IN     0.0001      -0.0001       0.0003    -0.5946
2002SE v 2010SE     0.0057       0.0053       0.0061    -0.6083
2006IN v 2006SE     0.0022       0.0020       0.0024    -0.5963
2006IN v 2010IN    -0.0005      -0.0007      -0.0003    -0.5962
2006IN v 2010SE     0.0080       0.0078       0.0082    -0.6050
2006SE v 2010IN     0.0022       0.0020       0.0024    -0.5987
2006SE v 2010SE     0.0015       0.0013       0.0017    -0.6228
2010IN v 2010SE     0.0071       0.0068       0.0074    -0.6084
                Fit.lower_CI Fit.upper_CI
Founde v 2002IN      -0.5838      -0.5830
Founde v 2002SE      -0.5812      -0.5802
Founde v 2006IN      -0.5839      -0.5831
Founde v 2006SE      -0.5798      -0.5786
Founde v 2010IN      -0.5839      -0.5827
Founde v 2010SE      -0.5845      -0.5831
2002IN v 2002SE      -0.5933      -0.5923
2002IN v 2006IN      -0.5940      -0.5932
2002IN v 2006SE      -0.5959      -0.5949
2002IN v 2010IN      -0.5966      -0.5956
2002IN v 2010SE      -0.6046      -0.6034
2002SE v 2006IN      -0.5936      -0.5926
2002SE v 2006SE      -0.5993      -0.5979
2002SE v 2010IN      -0.5952      -0.5940
2002SE v 2010SE      -0.6092      -0.6074
2006IN v 2006SE      -0.5967      -0.5959
2006IN v 2010IN      -0.5967      -0.5957
2006IN v 2010SE      -0.6054      -0.6046
2006SE v 2010IN      -0.5992      -0.5982
2006SE v 2010SE      -0.6234      -0.6222
2010IN v 2010SE      -0.6090      -0.6078
fst <- as.dist(pairwise_fst) ## Convert pairwise_fst to a distance object
Warning: non-square matrix
fst  ### we'll use this matrix later in the analysis
                Founde v 2002IN Founde v 2002SE Founde v 2006IN
Founde v 2002SE          0.0028                                
Founde v 2006IN          0.0014          0.0012                
Founde v 2006SE          0.0083          0.0079          0.0087
Founde v 2010IN          0.0030          0.0028          0.0032
Founde v 2010SE          0.0163          0.0159          0.0167
2002IN v 2002SE          0.0000         -0.0002          0.0002
2002IN v 2006IN         -0.0002         -0.0004          0.0000
2002IN v 2006SE          0.0030          0.0028          0.0032
2002IN v 2010IN         -0.0003         -0.0005         -0.0001
2002IN v 2010SE          0.0089          0.0086          0.0092
2002SE v 2006IN         -0.0004         -0.0006         -0.0002
2002SE v 2006SE          0.0004          0.0002          0.0006
2002SE v 2010IN          0.0001         -0.0001          0.0003
2002SE v 2010SE          0.0057          0.0053          0.0061
2006IN v 2006SE          0.0022          0.0020          0.0024
2006IN v 2010IN         -0.0005         -0.0007         -0.0003
2006IN v 2010SE          0.0080          0.0078          0.0082
2006SE v 2010IN          0.0022          0.0020          0.0024
2006SE v 2010SE          0.0015          0.0013          0.0017
2010IN v 2010SE          0.0071          0.0068          0.0074
                Founde v 2006SE Founde v 2010IN Founde v 2010SE
Founde v 2002SE                                                
Founde v 2006IN                                                
Founde v 2006SE                                                
Founde v 2010IN         -0.5833                                
Founde v 2010SE         -0.5838         -0.5845                
2002IN v 2002SE         -0.5928         -0.5933         -0.5923
2002IN v 2006IN         -0.5936         -0.5940         -0.5932
2002IN v 2006SE         -0.5954         -0.5959         -0.5949
2002IN v 2010IN         -0.5961         -0.5966         -0.5956
2002IN v 2010SE         -0.6040         -0.6046         -0.6034
2002SE v 2006IN         -0.5931         -0.5936         -0.5926
2002SE v 2006SE         -0.5986         -0.5993         -0.5979
2002SE v 2010IN         -0.5946         -0.5952         -0.5940
2002SE v 2010SE         -0.6083         -0.6092         -0.6074
2006IN v 2006SE         -0.5963         -0.5967         -0.5959
2006IN v 2010IN         -0.5962         -0.5967         -0.5957
2006IN v 2010SE         -0.6050         -0.6054         -0.6046
2006SE v 2010IN         -0.5987         -0.5992         -0.5982
2006SE v 2010SE         -0.6228         -0.6234         -0.6222
2010IN v 2010SE         -0.6084         -0.6090         -0.6078
                2002IN v 2002SE 2002IN v 2006IN 2002IN v 2006SE
Founde v 2002SE                                                
Founde v 2006IN                                                
Founde v 2006SE                                                
Founde v 2010IN                                                
Founde v 2010SE                                                
2002IN v 2002SE                                                
2002IN v 2006IN          0.0028                                
2002IN v 2006SE          0.0014          0.0022                
2002IN v 2010IN          0.0083         -0.0005         -0.0005
2002IN v 2010SE          0.0030          0.0080          0.0086
2002SE v 2006IN          0.0163          0.0022         -0.0006
2002SE v 2006SE          0.0000          0.0015          0.0002
2002SE v 2010IN         -0.0002          0.0071         -0.0001
2002SE v 2010SE          0.0030          0.0012          0.0053
2006IN v 2006SE         -0.0003          0.0079          0.0020
2006IN v 2010IN          0.0089          0.0028         -0.0007
2006IN v 2010SE         -0.0004          0.0159          0.0078
2006SE v 2010IN          0.0004         -0.0002          0.0020
2006SE v 2010SE          0.0001         -0.0004          0.0013
2010IN v 2010SE          0.0057          0.0028          0.0068
                2002IN v 2010IN 2002IN v 2010SE 2002SE v 2006IN
Founde v 2002SE                                                
Founde v 2006IN                                                
Founde v 2006SE                                                
Founde v 2010IN                                                
Founde v 2010SE                                                
2002IN v 2002SE                                                
2002IN v 2006IN                                                
2002IN v 2006SE                                                
2002IN v 2010IN                                                
2002IN v 2010SE          0.0087                                
2002SE v 2006IN          0.0032          0.0061                
2002SE v 2006SE          0.0167          0.0024         -0.5936
2002SE v 2010IN          0.0002         -0.0003         -0.5954
2002SE v 2010SE          0.0000          0.0082         -0.5961
2006IN v 2006SE          0.0032          0.0024         -0.6040
2006IN v 2010IN         -0.0001          0.0017         -0.5931
2006IN v 2010SE          0.0092          0.0074         -0.5986
2006SE v 2010IN         -0.0002         -0.5833         -0.5946
2006SE v 2010SE          0.0006         -0.5838         -0.6083
2010IN v 2010SE          0.0003         -0.5928         -0.5963
                2002SE v 2006SE 2002SE v 2010IN 2002SE v 2010SE
Founde v 2002SE                                                
Founde v 2006IN                                                
Founde v 2006SE                                                
Founde v 2010IN                                                
Founde v 2010SE                                                
2002IN v 2002SE                                                
2002IN v 2006IN                                                
2002IN v 2006SE                                                
2002IN v 2010IN                                                
2002IN v 2010SE                                                
2002SE v 2006IN                                                
2002SE v 2006SE                                                
2002SE v 2010IN         -0.5962                                
2002SE v 2010SE         -0.6050         -0.5959                
2006IN v 2006SE         -0.5987         -0.5966         -0.5967
2006IN v 2010IN         -0.6228         -0.6046         -0.5967
2006IN v 2010SE         -0.6084         -0.5936         -0.6054
2006SE v 2010IN         -0.5845         -0.5993         -0.5992
2006SE v 2010SE         -0.5933         -0.5952         -0.6234
2010IN v 2010SE         -0.5940         -0.6092         -0.6090
                2006IN v 2006SE 2006IN v 2010IN 2006IN v 2010SE
Founde v 2002SE                                                
Founde v 2006IN                                                
Founde v 2006SE                                                
Founde v 2010IN                                                
Founde v 2010SE                                                
2002IN v 2002SE                                                
2002IN v 2006IN                                                
2002IN v 2006SE                                                
2002IN v 2010IN                                                
2002IN v 2010SE                                                
2002SE v 2006IN                                                
2002SE v 2006SE                                                
2002SE v 2010IN                                                
2002SE v 2010SE                                                
2006IN v 2006SE                                                
2006IN v 2010IN         -0.5923                                
2006IN v 2010SE         -0.5932         -0.5926                
2006SE v 2010IN         -0.5949         -0.5979         -0.5959
2006SE v 2010SE         -0.5956         -0.5940         -0.5957
2010IN v 2010SE         -0.6034         -0.6074         -0.6046
                2006SE v 2010IN 2006SE v 2010SE
Founde v 2002SE                                
Founde v 2006IN                                
Founde v 2006SE                                
Founde v 2010IN                                
Founde v 2010SE                                
2002IN v 2002SE                                
2002IN v 2006IN                                
2002IN v 2006SE                                
2002IN v 2010IN                                
2002IN v 2010SE                                
2002SE v 2006IN                                
2002SE v 2006SE                                
2002SE v 2010IN                                
2002SE v 2010SE                                
2006IN v 2006SE                                
2006IN v 2010IN                                
2006IN v 2010SE                                
2006SE v 2010IN                                
2006SE v 2010SE         -0.5982                
2010IN v 2010SE         -0.6222         -0.6078


######################################################################################################
# CODE FOR PART D - VISUALIZE THE DATA BY CONSTRUCTING A NEIGHBOR-JOINING TREE 
######################################################################################################

### Now let's make a phylogenetic tree!!
### We have to import actual genotypic data to construct 
#population distances and a phylogenetic tree
genotype_data <- read.csv("INT_SEGSS_genotypes.csv",header=TRUE, sep = ",",comment.char = "") 

### Remove the first column of the data file, 
#which contains the sample ID, so that the matrix only has genotypes
genotypes <- genotype_data[,2:201] 
## Creates a vector containing the population assignments of each individual
pop_labels <- c(rep("Founders1998",50),rep("Int2002",50),
                rep("Seg2002",50),rep("Int2006",50),rep("Seg2006",50),rep("Int2010",50),
                rep("Seg2010",50) ) 
 ## Converts labels to "factor"
pops <- as.factor(pop_labels) 

###Convert to format compatible for use with phylogenetic tree 
#packages in R; stores data as gene frequencies
genet_file <- char2genet(genotypes,pops,complete=TRUE)  

#### Computes Nei's distances between populations
Nei_dist <- dist.genet(genet_file,method=1)    
## View the distance matrix
Nei_dist 
        Founders1998  Int2002  Int2006  Int2010  Seg2002  Seg2006
Int2002     0.008515                                             
Int2006     0.009266 0.006689                                    
Int2010     0.011178 0.006148 0.006455                           
Seg2002     0.010537 0.006519 0.006407 0.006871                  
Seg2006     0.018523 0.011036 0.010442 0.010407 0.005838         
Seg2010     0.037075 0.023899 0.023570 0.021359 0.017109 0.009984
###Construct a Neighbor-joining tree from the distance matrix
tree <- nj(Nei_dist) 
### Plot the Neighbor-joining tree
plot.phylo(tree)  


#### To bootstrap the tree
## Write a function to bootstap the NJ tree
func <- function(x) nj(dist.genet(char2genet(x,pops)))  
bootstraps <- boot.phylo(tree,genotypes,func,B=100,block=1,rooted=TRUE)

  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%

#### Plot your original tree
plot.phylo(tree)    
###Add the bootstrap values from 
#the 100 trees made from randomly sampling your data
nodelabels(bootstraps)  

plot of chunk unnamed-chunk-1

bootstraps
[1] 100  53  60 100 100


#################################################################################################################################
### SAVE THE IMAGE OF YOUR TREE FOR YOUR LAB WRITE-UP!!
##################################################################################################################################


###################################################################################################################################
# CODE FOR PART E - VISUALIZE THE DATA THROUGH MULTIDIMENSIONAL SCALING ANALYSIS (ALSO KNOWN AS A PRINCIPAL COORDINATES ANALYSIS)
##################################################################################################################################

######## Conduct a Principal Coordinates Analysis on the Nei_dist matrix generated previously

#### Conducts the PCoA; choose to retain 2 axes
pcoa<-dudi.pco(Nei_dist,scannf=FALSE,nf=2)   
Warning: Non euclidean distance
### Lists the eigenvalues for the axes; 
#represents the variation explained by each axis
pcoa$eig  
[1] 1.100e-04 4.805e-06 3.200e-06 1.767e-06
### Plots the populations on the first two axes
scatter(pcoa,xax=1,yax=2,clab.row=1,posieig="topright") 

plot of chunk unnamed-chunk-1


### This code yields the proportion of 
#total variation explained by the x-axis in your plot
variance_explained1<-pcoa$eig[1]/sum(pcoa$eig)   
variance_explained1 
[1] 0.9184
## This code yields the proportion of 
#total variation explained by the y-axis in your plot
variance_explained2<-pcoa$eig[2]/sum(pcoa$eig)   
variance_explained2
[1] 0.0401

#################################################################################################################################
### SAVE THE IMAGE OF YOUR TREE FOR YOUR LAB WRITE-UP!!

### QUESTION: HOW MUCH VARIATION DOES THE X-AXIs EXPLAIN? THE Y-AXIS? 
#What population relationships are described by these axes?
##################################################################################################################################


###################################################################################################################################
# CODE FOR PART F - VISUALIZE THE DATA: 
#THE RELATIONSHIP BETWEEN GENETIC AND GEOGRAPHIC DISTANCE
##################################################################################################################################

###Read in the matix of geographic distances
geo_dists <- read.csv("geographic_distances.csv",header=TRUE,row.names=1)  
Warning: cannot open file 'geographic_distances.csv': No such file or
directory
Error: cannot open the connection
##convert to a distance object
geo_dist_object <- as.dist(geo_dists)
Error: object 'geo_dists' not found
geo_dist_object
Error: object 'geo_dist_object' not found
par(mar=c(5,5,5,5))
plot(geo_dist_object,fst,type='p',pch=19,col="blue",
  main="Genetic vs. Geographic Distance", xlab="Geographic Distance (km)",
  ylab="Genetic Distance (W&C Fst)",cex.main=2.5,cex.lab=2,cex.axis=2)
Error: error in evaluating the argument 'x' in selecting a method for function 'plot': Error: object 'geo_dist_object' not found

### Fits a linear model to the data
relationship <- lm(fst~geo_dist_object)  
Error: object 'geo_dist_object' not found
### Get the results of the linear regression model
relationship   
Error: object 'relationship' not found
### Plot the regression line to the graph of genetic vs. geographic distance
abline(relationship,lwd=3,col="black")  
Error: object 'relationship' not found
### Gives the statistical results: the r-squared value and 
#the signficance of the explanatory variable
summary(relationship)    
Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error: object 'relationship' not found

#################################################################################################################################
### QUESTION: 
### WHAT IS THE R-SQUARED VALUE OF THE LINEAR MODEL?
### IS THERE A SIGNIFICANT RELATIONSHIP BETWEEN GENETIC DISTANCE 
#AND GEOGRAPHIC DISTANCE? 
##################################################################################################################################