001 package org.jaga.exampleApplications.proteinLocation;
002
003 import org.jaga.individualRepresentation.proteinLocation.*;
004 import org.jaga.definitions.*;
005 import org.jaga.fitnessEvaluation.proteinLocation.*;
006 import org.jaga.util.*;
007 import org.jaga.masterAlgorithm.*;
008 import org.jaga.reproduction.*;
009 import org.jaga.hooks.*;
010 import org.jaga.selection.*;
011
012 /**
013 * TODO: Complete these comments.
014 *
015 * <p><u>Project:</u> JAGA - Java API for Genetic Algorithms.</p>
016 *
017 * <p><u>Company:</u> University College London and JAGA.Org
018 * (<a href="http://www.jaga.org" target="_blank">http://www.jaga.org</a>).
019 * </p>
020 *
021 * <p><u>Copyright:</u> (c) 2004 by G. Paperin.<br/>
022 * This program is free software; you can redistribute it and/or modify
023 * it under the terms of the GNU General Public License as published by
024 * the Free Software Foundation, ONLY if you include a note of the original
025 * author(s) in any redistributed/modified copy.<br/>
026 * This program is distributed in the hope that it will be useful,
027 * but WITHOUT ANY WARRANTY; without even the implied warranty of
028 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
029 * GNU General Public License for more details.<br/>
030 * You should have received a copy of the GNU General Public License
031 * along with this program; if not, write to the Free Software
032 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
033 * or see http://www.gnu.org/licenses/gpl.html</p>
034 *
035 * @author Greg Paperin (greg@jaga.org)
036 *
037 * @version JAGA public release 1.0 beta
038 */
039
040 public class Validator {
041
042 public Validator() {}
043
044 public void exec() {
045
046 GAParameterSet params = new DefaultParameterSet();
047
048 SimplifiedFastaFileParser parser = new SimplifiedFastaFileParser();
049 ProteinGroup cytosol = new ProteinGroup("Cytosol", parser, "D:/Courseworks/4C58/cw/data/Cytosol.valid.dat");
050 ProteinGroup extracellular = new ProteinGroup("Extracellular", parser, "D:/Courseworks/4C58/cw/data/Extracellular.valid.dat");
051 ProteinGroup nucleus = new ProteinGroup("Nucleus", parser, "D:/Courseworks/4C58/cw/data/Nucleus.valid.dat");
052 ProteinGroup mitochondrion = new ProteinGroup("Mitochondrion", parser, "D:/Courseworks/4C58/cw/data/Mitochondrion.valid.dat");
053
054 params.setMaxGenerationNumber(1);
055 CombinedReproductionAlgorithm repAlg = new CombinedReproductionAlgorithm();
056 params.setReproductionAlgorithm(repAlg);
057 params.setSelectionAlgorithm(new TwoTournamentProbabalisticSelection(1.0));
058 ProteinLocationClassifierFactory factory = new ProteinLocationClassifierFactory();
059 params.setIndividualsFactory(factory);
060 AnalysisHook analysisHook = new AnalysisHook();
061 analysisHook.setLogStream(System.out);
062 analysisHook.setUpdateDelay(1500);
063
064 System.out.println("\n\n ** ** ******************** CYTOSOL: ******************** ** **");
065
066 ProteinGroup allPositives = new ProteinGroup();
067 allPositives.add(cytosol);
068 ProteinGroup allNegatives = new ProteinGroup();
069 allNegatives.add(mitochondrion);
070 allNegatives.add(extracellular);
071 allNegatives.add(nucleus);
072 params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
073 factory.setClassifierName("Cytosol");
074
075 Object [] classif = new Object [] {
076 "<Hydrophobic>-<Hydrophobic>-<Small>-[M]-[SAD]-<Polar>-<Polar>-<Aliphatic>-[KCGTP]-[NTQKR]-[FVCTHW]-[FCTHM]-<Small>-[VWFAMHDRSNQT]-[VWFAMHDRSNTE]-<Polar>-[ETHS]-[YETLH]-[LFH]-[DQPFLHTY]-[KGCISRW]-<Small>-<Hydrophobic>-<Polar>-<Small>-<Tiny>-<Tiny>-<Small>-[DGCPL]-[PCVSNR]-[CDVAKH]-[AQ]-[CWTI]-<Aliphatic>-<Negative>-<Small>-<Small>-<Small>-<Small>-<Small>-[HVFQSM]-[IRKEWQ]-[HNYDMG]-<Positive>-[LTKVI]-[DEF]-<Hydrophobic>-[RPLSECTVMFD]-[CDVNKH]-[CDVNKH]-[PCTV]-[CTAR]-<Polar>-<Polar>-<Small>-[CVPS]-[GTAVM]-[PNDVCL]-[DWHEC]-<Polar>-<Polar>-<Polar>-[STPNC]-[KDH]-[YWDENM]-[YWTLK]-<Small>-<Small>-[RPLYSECTVMI]-[ATVPL]-?-?-<Small>-?-<Hydrophobic>-?-[RTNCYMKISFPE]-[EDHW]-[EALKVNDH]-[MWEFKTVNGRCAYIP]-?",
077 "<Hydrophobic>-[DTRWSN]-<Polar>-<Hydrophobic>-<Small>-[VTCP]-[DA]-<Positive>-<Positive>-[SAG]-[DKPLS]-[NQEV]-<Small>-[NGCPT]-[DEF]-<Polar>-[ILVDPA]-[RHG]-[DQYM]-<Small>-<Hydrophobic>-[GQL]-<Hydrophobic>-[HQYMEW]-[HQYME]-<Hydrophobic>-[ASNFK]-<Polar>-<Hydrophobic>-[NQSLV]-<Hydrophobic>-<Hydrophobic>-<Tiny>-[YHGTP]-<Polar>-<Polar>-<Small>-<Aliphatic>-<Small>-<Small>-<Hydrophobic>-<Small>-<Tiny>-[YGTMC]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[IMN]-<Hydrophobic>-[SEC]-<Aromatic>-<Aromatic>-<Aromatic>-[KAFTIG]-<Small>-<Tiny>-[WSKDV]-<Polar>-<Hydrophobic>-[MLAQD]-<Aromatic>-W-[YWFHA]-<Small>-<Small>-[CILVGS]-[REDKG]-[HYCT]-<Tiny>-<Hydrophobic>-[STNYC]-<Negative>-<Negative>-<Hydrophobic>-[VLP]-[YHTD]-[EKM]-[PAY]-E-<Hydrophobic>-<Aliphatic>-<Aromatic>-<Aromatic>-[MKGWTL]-[ILVFKN]-[IWKTA]-[AIMYC]-<Hydrophobic>-<Hydrophobic>-[SCAMDFNRP]-[MAGTEHCP]-[MANYEFPHDT]-[ASDTGC]-[GSQICRVWDLE]-<Polar>-[SPGFQWD]-[SPVTALC]-?-?"
078 };
079 params.setPopulationSize(2);
080 InitialPopulationGA ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
081 ga.addHook(analysisHook);
082
083 FittestIndividualResult result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
084 ProteinLocationClassifier cytosolClass = (ProteinLocationClassifier) result.getFittestIndividual();
085
086
087 System.out.println("\n\n ** ** ******************** NUCLEUS: ******************** ** **");
088
089 allPositives = new ProteinGroup();
090 allPositives.add(nucleus);
091 allNegatives = new ProteinGroup();
092 allNegatives.add(mitochondrion);
093 allNegatives.add(extracellular);
094 allNegatives.add(cytosol);
095 params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
096 factory.setClassifierName("Nucleus");
097
098 classif = new Object [] {
099 "[IVLN]-[GFVM]-[HEF]-<Hydrophobic>-[RKHT]-[WAV]-[VILP]-[NRQIWKDM]-[HWAKG]-<Small>-<Hydrophobic>-[GP]-[RQLNS]-[ADNGE]-<Small>-<Aliphatic>-[GSARE]-<Polar>-[CNTSF]-<Small>-<Aromatic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-R-[WNQDH]-[YHTG]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Small>-[NDLI]-[NIVWKD]-<Hydrophobic>-[GYVHTK]-[WLHMTR]-<Polar>-<Small>-<Aliphatic>-<Hydrophobic>-<Polar>-<Negative>-[ADEIL]-<Hydrophobic>-<Hydrophobic>-[MKGTY]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Aliphatic>-<Hydrophobic>-[EDC]-<Polar>-[NQCMVDTE]-[NQKRMVDIY]-[CSGKH]-<Small>-<Aliphatic>-[EDI]-[NVGPCY]-<Polar>-<Aliphatic>-[ADTGQ]-<Aromatic>-<Aliphatic>-<Hydrophobic>-[DPVY]-<Aromatic>-<Aromatic>-[KTYVIF]-<Hydrophobic>-<Aromatic>-[FHYWRS]-<Small>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[CADRMEL]-[PDMEGIHNT]-?-?-?-?-[NGDKAEVYRCFQ]-?-?-?-?-?-[TNDMKVAFSPGRCI]-[TCMLSERWIDAFGPVKQN]-?-[RLFNPKDEQHGSYTC]-?-[SILDMGQRYPTFNEHW]-[MIQSRPNFGDVHELW]-[ATVGHQPW]-[EDM]",
100 "[LHWVY]-<Hydrophobic>-<Aromatic>-[DWKQVL]-[NPDHE]-[RLE]-<Aromatic>-<Hydrophobic>-[PTVG]-<Hydrophobic>-[PCNGE]-[CEYRVQ]-<Hydrophobic>-<Hydrophobic>-<Small>-<Polar>-[QH]-[RNKQ]-[EYNKS]-[REKD]-<Hydrophobic>-[LWYGFM]-[ACMLHK]-<Negative>-[RTQ]-<Small>-<Small>-<Aromatic>-<Small>-<Aromatic>-[NGFP]-<Hydrophobic>-<Small>-<Small>-[KHN]-<Small>-[GLRMEW]-<Positive>-<Polar>-<Small>-[SGADC]-<Small>-<Small>-<Polar>-[CASW]-<Aliphatic>-[FCN]-[FCN]-[GPVA]-[ED]-[LGYMF]-[HFE]-<Small>-[WLMYQ]-P-<Hydrophobic>-[MWFHG]-[IVD]-<Aliphatic>-<Tiny>-[VITQ]-[FGCS]-<Small>-<Small>-[CSDAG]-<Small>-<Small>-<Polar>-<Hydrophobic>-<Small>-<Small>-<Aromatic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[GASY]-[LYW]-<Polar>-[AYGW]-[EQKMDLC]-<Positive>-<Small>-<Small>-<Polar>-[DCPM]-<Aliphatic>-<Aliphatic>-<Positive>-[ACWL]-<Hydrophobic>-<Hydrophobic>-[GAMLSPVN]-[LGYMNPQKFSHV]-[MLNPQGVYRS]-<Small>-[MGDKPVQSA]-<Polar>-[PSDRMTCQ]-[VMHPSYNGFD]",
101 "[NSL]-<Aromatic>-[ACGTR]-<Small>-<Negative>-<Negative>-[IVLTHE]-<Polar>-<Tiny>-<Negative>-<Negative>-D-<Aromatic>-<Hydrophobic>-<Negative>-[LFAYERWD]-[LAYEC]-[GASH]-<Small>-<Polar>-<Hydrophobic>-[EMV]-[IC]-<Small>-[EDRV]-<Positive>-[FYW]-<Positive>-[QEKYD]-<Small>-<Polar>-[YQWTGV]-<Small>-[FGMLS]-[LWGAHT]-<Small>-<Small>-<Small>-<Aliphatic>-<Small>-<Small>-<Hydrophobic>-<Hydrophobic>-<Negative>-<Hydrophobic>-[KWQGNYLAEPS]-<Hydrophobic>-?-?-?-[MPISKYGWRNAFD]-?-?-?-?-?-?-?-[RNSHCWGEMPI]-?-?-?-?-?-[QAESPNR]-?-<Hydrophobic>-?-?-?-?-?-?-?-?-?-?-?-?-?-?-?-?-[TMIRVNFSHG]-?-?-?-[AMSRIQFVNGEDT]-[MAPYNTFSRIDHWV]-<Hydrophobic>-?-[SADGHLIMCKFNQRV]-[EGKLCMIHATW]-[MSATDPLKGYNEFCR]-[MSRNGAKQEWPHC]-[NMSGDTQVYKRW]-[NKLSADTYERPVFHW]-[NSKVPQDEY]-[NMSEGPKTCY]-[DERQYNHPM]"
102 };
103 params.setPopulationSize(3);
104 ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
105 analysisHook.reset();
106 ga.addHook(analysisHook);
107
108 result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
109 ProteinLocationClassifier nucleusClass = (ProteinLocationClassifier) result.getFittestIndividual();
110
111
112 System.out.println("\n\n ** ** ******************** MITOCHONDTION: ******************** ** **");
113
114 allPositives = new ProteinGroup();
115 allPositives.add(mitochondrion);
116 allNegatives = new ProteinGroup();
117 allNegatives.add(nucleus);
118 allNegatives.add(extracellular);
119 allNegatives.add(cytosol);
120 params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
121 factory.setClassifierName("Mitochondrion");
122
123 classif = new Object [] {
124 "<Aliphatic>-<Small>-<Small>-[MHWT]-[QRDPL]-[WQRDP]-[TSCPK]-[TNAGD]-[TGPCI]-[KWYES]-[FYHW]-[FYHW]-[WY]-<Aromatic>-<Aromatic>-[MP]-<Tiny>-[GPDCTK]-<Tiny>-<Hydrophobic>-[RYVELC]-<Small>-<Small>-<Aromatic>-[YNHK]-[YNHK]-[DEVL]-[ILS]-<Small>-[SI]-[WYHA]-<Aliphatic>-<Positive>-<Small>-[SAPTH]-<Hydrophobic>-<Hydrophobic>-[TREYH]-[RKI]-[RKI]-[DQI]-[HKE]-[PHD]-<Small>-<Small>-<Hydrophobic>-<Polar>-[SNCDK]-<Aliphatic>-[LISWY]-<Tiny>-<Aliphatic>-<Small>-[KHRQ]-<Tiny>-[HCVGK]-<Hydrophobic>-<Aliphatic>-[KGFDL]-[VIP]-[NWQLEISGA]-<Tiny>-<Tiny>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[CNGVAM]-[LEQ]-<Positive>-<Hydrophobic>-<Hydrophobic>-[RHKWC]-[WKHS]-[AMWLT]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[WLAYF]-[LGCYSE]-[MHTIWN]-[CTPNY]-<Tiny>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Small>-<Small>-<Hydrophobic>-[VKEFWLA]-<Hydrophobic>-<Hydrophobic>-[WEARNLDG]-<Hydrophobic>-[GLMRINDASQPY]-[LMREFINVAGT]-[FMLIYW]-[LRFI]-[SAGRPFWQKNITH]-[TRSCAKMYPV]"
125 };
126 params.setPopulationSize(1);
127 ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
128 analysisHook.reset();
129 ga.addHook(analysisHook);
130
131 result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
132 ProteinLocationClassifier mitochondrionClass = (ProteinLocationClassifier) result.getFittestIndividual();
133
134 System.out.println("\n\n ** ** ******************** EXTRACELLULAR: ******************** ** **");
135
136 allPositives = new ProteinGroup();
137 allPositives.add(extracellular);
138 allNegatives = new ProteinGroup();
139 allNegatives.add(mitochondrion);
140 allNegatives.add(nucleus);
141 allNegatives.add(cytosol);
142 params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
143 factory.setClassifierName("Extracellular");
144
145 classif = new Object [] {
146 "[QWYTE]-[KLWF]-[MWGVP]-[PLT]-[PLT]-[ASGE]-<Negative>-<Hydrophobic>-[WICVDQ]-<Hydrophobic>-<Small>-<Small>-<Hydrophobic>-<Tiny>-[YSEN]-<Hydrophobic>-[LHMYID]-[LHMYID]-<Small>-<Small>-[VSTAW]-<Positive>-<Hydrophobic>-<Aromatic>-<Aromatic>-<Polar>-[WLTIMR]-[EKC]-<Small>-A-<Hydrophobic>-<Small>-<Small>-[RAL]-<Hydrophobic>-<Polar>-[LVIT]-[ASGTH]-[NKDYWF]-[WSTQ]-[WQHIPN]-[ESDW]-<Hydrophobic>-<Hydrophobic>-[LVFCHK]-[CYKAT]-<Hydrophobic>-[QPWYKR]-<Small>-[GSAV]-<Hydrophobic>-[ST]-[TCPA]-[VPDC]-[DNW]-[DNL]-[YEWTD]-G-<Small>-[TCL]-[HRW]-[EGV]-[DVP]-<Small>-<Small>-<Small>-[CHYR]-<Small>-[FYHW]-<Small>-[SQNYWL]-[KTDQE]-<Positive>-<Positive>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Small>-<Small>-<Small>-<Hydrophobic>-[AWYIE]-[SGWN]-[YMVTSH]-[PTCNWGQDVHMI]-?-?-?-[MKRPSHWATFVLEQC]-?-?-?-?-?-?-[FLINSQVWC]-<Hydrophobic>-[LCFW]-[LI]-<Hydrophobic>"
147 };
148 params.setPopulationSize(1);
149 ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
150 analysisHook.reset();
151 ga.addHook(analysisHook);
152
153 result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
154 ProteinLocationClassifier extracellularClass = (ProteinLocationClassifier) result.getFittestIndividual();
155
156
157 Locator locator = new Locator();
158 locator.setClassifiers(cytosolClass, nucleusClass,
159 mitochondrionClass, extracellularClass);
160 locator.exec("D:/Courseworks/4C58/cw/data/Unk.fasta");
161 //locator.exec("D:/Courseworks/4C58/cw/data/Cytosol.valid.dat");
162 //locator.exec("D:/Courseworks/4C58/cw/data/Nucleus.valid.dat");
163 //locator.exec("D:/Courseworks/4C58/cw/data/Mitochondrion.valid.dat");
164 //locator.exec("D:/Courseworks/4C58/cw/data/Extracellular.valid.dat");
165 //locator.exec("D:/Courseworks/4C58/cw/data/Cytosol.train.dat");
166 //locator.exec("D:/Courseworks/4C58/cw/data/Nucleus.train.dat");
167 //locator.exec("D:/Courseworks/4C58/cw/data/Mitochondrion.train.dat");
168 //locator.exec("D:/Courseworks/4C58/cw/data/Extracellular.train.dat");
169
170 }
171
172 public static void main(String[] unusedArgs) {
173 Validator validator = new Validator();
174 validator.exec();
175 }
176
177 }