# read in file filename <- "C:/Users/Sonia/Downloads/ID for DEG in QPX (trimmed).txt" diff_gene_data <- read.delim(filename, stringsAsFactors = FALSE) # separate genes out by large function. The rest of the script only works with proc_data (biological processes) cell_data <- subset(diff_gene_data, GO.domain2 == "C") mol_data <- subset(diff_gene_data, GO.domain2 == "F") proc_data <- subset(diff_gene_data, GO.domain2 == "P") # grab genes with multiple Go-slim terms genes <- unique(proc_data$Gene_ID) multiple_paths <- vector(length = 0) for(i in 1:length(genes)){ sub <-subset(proc_data, Gene_ID == genes[i], select = c(Contig, Gene_ID, GO.slim, GO.domain2)) if(length(unique(sub$GO.slim)) > 2){ multiple_paths <- append(multiple_paths, genes[i]) } } # proteins with multiple Go-slim terms: "P87027" "Q8VEH3" "Q5ZMA2" "Q5BJ90" "B2RX14" "Q6NVU2" "P62137" "P83877" "Q6PQD5" "Q61749" "O22607" "Q09472" "Q5DU56" # grab other information for those genes mult_proc <- subset(proc_data, Gene_ID == multiple_paths[1], select = c(Contig, Gene_ID, GO.slim, GO.term, GO.term.2, Protein.function)) for(i in 2:length(multiple_paths)){ mult_proc <- rbind(mult_proc, subset(proc_data, Gene_ID == multiple_paths[i], select = c(Contig, Gene_ID, GO.slim, GO.term, GO.term.2, Protein.function))) } # This function keeps giving me an error because it says my data is set up as a list, which can't be used in "write" write(mult_proc, file = "multiple_Goslim_proteins.txt", sep = "\t")