001    package org.jaga.exampleApplications.proteinLocation;
002    
003    import org.jaga.individualRepresentation.proteinLocation.*;
004    import org.jaga.definitions.*;
005    import org.jaga.fitnessEvaluation.proteinLocation.*;
006    import org.jaga.util.*;
007    import org.jaga.masterAlgorithm.*;
008    import org.jaga.reproduction.*;
009    import org.jaga.hooks.*;
010    import org.jaga.selection.*;
011    
012    /**
013     * TODO: Complete these comments.
014     *
015     * <p><u>Project:</u> JAGA - Java API for Genetic Algorithms.</p>
016     *
017     * <p><u>Company:</u> University College London and JAGA.Org
018     *    (<a href="http://www.jaga.org" target="_blank">http://www.jaga.org</a>).
019     * </p>
020     *
021     * <p><u>Copyright:</u> (c) 2004 by G. Paperin.<br/>
022     *    This program is free software; you can redistribute it and/or modify
023     *    it under the terms of the GNU General Public License as published by
024     *    the Free Software Foundation, ONLY if you include a note of the original
025     *    author(s) in any redistributed/modified copy.<br/>
026     *    This program is distributed in the hope that it will be useful,
027     *    but WITHOUT ANY WARRANTY; without even the implied warranty of
028     *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
029     *    GNU General Public License for more details.<br/>
030     *    You should have received a copy of the GNU General Public License
031     *    along with this program; if not, write to the Free Software
032     *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
033     *    or see http://www.gnu.org/licenses/gpl.html</p>
034     *
035     * @author Greg Paperin (greg@jaga.org)
036     *
037     * @version JAGA public release 1.0 beta
038     */
039    
040    public class Validator {
041    
042            public Validator() {}
043    
044            public void exec() {
045    
046                    GAParameterSet params = new DefaultParameterSet();
047    
048                    SimplifiedFastaFileParser parser = new SimplifiedFastaFileParser();
049                    ProteinGroup cytosol = new ProteinGroup("Cytosol", parser, "D:/Courseworks/4C58/cw/data/Cytosol.valid.dat");
050                    ProteinGroup extracellular = new ProteinGroup("Extracellular", parser, "D:/Courseworks/4C58/cw/data/Extracellular.valid.dat");
051                    ProteinGroup nucleus = new ProteinGroup("Nucleus", parser, "D:/Courseworks/4C58/cw/data/Nucleus.valid.dat");
052                    ProteinGroup mitochondrion = new ProteinGroup("Mitochondrion", parser, "D:/Courseworks/4C58/cw/data/Mitochondrion.valid.dat");
053    
054                    params.setMaxGenerationNumber(1);
055                    CombinedReproductionAlgorithm repAlg = new CombinedReproductionAlgorithm();
056                    params.setReproductionAlgorithm(repAlg);
057                    params.setSelectionAlgorithm(new TwoTournamentProbabalisticSelection(1.0));
058                    ProteinLocationClassifierFactory factory = new ProteinLocationClassifierFactory();
059                    params.setIndividualsFactory(factory);
060                    AnalysisHook analysisHook = new AnalysisHook();
061                    analysisHook.setLogStream(System.out);
062                    analysisHook.setUpdateDelay(1500);
063    
064                    System.out.println("\n\n  **  **  ******************** CYTOSOL: ********************  **  **");
065    
066                    ProteinGroup allPositives = new ProteinGroup();
067                    allPositives.add(cytosol);
068                    ProteinGroup allNegatives = new ProteinGroup();
069                    allNegatives.add(mitochondrion);
070                    allNegatives.add(extracellular);
071                    allNegatives.add(nucleus);
072                    params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
073                    factory.setClassifierName("Cytosol");
074    
075                    Object [] classif = new Object [] {
076                            "<Hydrophobic>-<Hydrophobic>-<Small>-[M]-[SAD]-<Polar>-<Polar>-<Aliphatic>-[KCGTP]-[NTQKR]-[FVCTHW]-[FCTHM]-<Small>-[VWFAMHDRSNQT]-[VWFAMHDRSNTE]-<Polar>-[ETHS]-[YETLH]-[LFH]-[DQPFLHTY]-[KGCISRW]-<Small>-<Hydrophobic>-<Polar>-<Small>-<Tiny>-<Tiny>-<Small>-[DGCPL]-[PCVSNR]-[CDVAKH]-[AQ]-[CWTI]-<Aliphatic>-<Negative>-<Small>-<Small>-<Small>-<Small>-<Small>-[HVFQSM]-[IRKEWQ]-[HNYDMG]-<Positive>-[LTKVI]-[DEF]-<Hydrophobic>-[RPLSECTVMFD]-[CDVNKH]-[CDVNKH]-[PCTV]-[CTAR]-<Polar>-<Polar>-<Small>-[CVPS]-[GTAVM]-[PNDVCL]-[DWHEC]-<Polar>-<Polar>-<Polar>-[STPNC]-[KDH]-[YWDENM]-[YWTLK]-<Small>-<Small>-[RPLYSECTVMI]-[ATVPL]-?-?-<Small>-?-<Hydrophobic>-?-[RTNCYMKISFPE]-[EDHW]-[EALKVNDH]-[MWEFKTVNGRCAYIP]-?",
077                            "<Hydrophobic>-[DTRWSN]-<Polar>-<Hydrophobic>-<Small>-[VTCP]-[DA]-<Positive>-<Positive>-[SAG]-[DKPLS]-[NQEV]-<Small>-[NGCPT]-[DEF]-<Polar>-[ILVDPA]-[RHG]-[DQYM]-<Small>-<Hydrophobic>-[GQL]-<Hydrophobic>-[HQYMEW]-[HQYME]-<Hydrophobic>-[ASNFK]-<Polar>-<Hydrophobic>-[NQSLV]-<Hydrophobic>-<Hydrophobic>-<Tiny>-[YHGTP]-<Polar>-<Polar>-<Small>-<Aliphatic>-<Small>-<Small>-<Hydrophobic>-<Small>-<Tiny>-[YGTMC]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[IMN]-<Hydrophobic>-[SEC]-<Aromatic>-<Aromatic>-<Aromatic>-[KAFTIG]-<Small>-<Tiny>-[WSKDV]-<Polar>-<Hydrophobic>-[MLAQD]-<Aromatic>-W-[YWFHA]-<Small>-<Small>-[CILVGS]-[REDKG]-[HYCT]-<Tiny>-<Hydrophobic>-[STNYC]-<Negative>-<Negative>-<Hydrophobic>-[VLP]-[YHTD]-[EKM]-[PAY]-E-<Hydrophobic>-<Aliphatic>-<Aromatic>-<Aromatic>-[MKGWTL]-[ILVFKN]-[IWKTA]-[AIMYC]-<Hydrophobic>-<Hydrophobic>-[SCAMDFNRP]-[MAGTEHCP]-[MANYEFPHDT]-[ASDTGC]-[GSQICRVWDLE]-<Polar>-[SPGFQWD]-[SPVTALC]-?-?"
078                    };
079                    params.setPopulationSize(2);
080                    InitialPopulationGA ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
081                    ga.addHook(analysisHook);
082    
083                    FittestIndividualResult result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
084                    ProteinLocationClassifier cytosolClass = (ProteinLocationClassifier) result.getFittestIndividual();
085    
086    
087                    System.out.println("\n\n  **  **  ******************** NUCLEUS: ********************  **  **");
088    
089                    allPositives = new ProteinGroup();
090                    allPositives.add(nucleus);
091                    allNegatives = new ProteinGroup();
092                    allNegatives.add(mitochondrion);
093                    allNegatives.add(extracellular);
094                    allNegatives.add(cytosol);
095                    params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
096                    factory.setClassifierName("Nucleus");
097    
098                    classif = new Object [] {
099                            "[IVLN]-[GFVM]-[HEF]-<Hydrophobic>-[RKHT]-[WAV]-[VILP]-[NRQIWKDM]-[HWAKG]-<Small>-<Hydrophobic>-[GP]-[RQLNS]-[ADNGE]-<Small>-<Aliphatic>-[GSARE]-<Polar>-[CNTSF]-<Small>-<Aromatic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-R-[WNQDH]-[YHTG]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Small>-[NDLI]-[NIVWKD]-<Hydrophobic>-[GYVHTK]-[WLHMTR]-<Polar>-<Small>-<Aliphatic>-<Hydrophobic>-<Polar>-<Negative>-[ADEIL]-<Hydrophobic>-<Hydrophobic>-[MKGTY]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Aliphatic>-<Hydrophobic>-[EDC]-<Polar>-[NQCMVDTE]-[NQKRMVDIY]-[CSGKH]-<Small>-<Aliphatic>-[EDI]-[NVGPCY]-<Polar>-<Aliphatic>-[ADTGQ]-<Aromatic>-<Aliphatic>-<Hydrophobic>-[DPVY]-<Aromatic>-<Aromatic>-[KTYVIF]-<Hydrophobic>-<Aromatic>-[FHYWRS]-<Small>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[CADRMEL]-[PDMEGIHNT]-?-?-?-?-[NGDKAEVYRCFQ]-?-?-?-?-?-[TNDMKVAFSPGRCI]-[TCMLSERWIDAFGPVKQN]-?-[RLFNPKDEQHGSYTC]-?-[SILDMGQRYPTFNEHW]-[MIQSRPNFGDVHELW]-[ATVGHQPW]-[EDM]",
100                            "[LHWVY]-<Hydrophobic>-<Aromatic>-[DWKQVL]-[NPDHE]-[RLE]-<Aromatic>-<Hydrophobic>-[PTVG]-<Hydrophobic>-[PCNGE]-[CEYRVQ]-<Hydrophobic>-<Hydrophobic>-<Small>-<Polar>-[QH]-[RNKQ]-[EYNKS]-[REKD]-<Hydrophobic>-[LWYGFM]-[ACMLHK]-<Negative>-[RTQ]-<Small>-<Small>-<Aromatic>-<Small>-<Aromatic>-[NGFP]-<Hydrophobic>-<Small>-<Small>-[KHN]-<Small>-[GLRMEW]-<Positive>-<Polar>-<Small>-[SGADC]-<Small>-<Small>-<Polar>-[CASW]-<Aliphatic>-[FCN]-[FCN]-[GPVA]-[ED]-[LGYMF]-[HFE]-<Small>-[WLMYQ]-P-<Hydrophobic>-[MWFHG]-[IVD]-<Aliphatic>-<Tiny>-[VITQ]-[FGCS]-<Small>-<Small>-[CSDAG]-<Small>-<Small>-<Polar>-<Hydrophobic>-<Small>-<Small>-<Aromatic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[GASY]-[LYW]-<Polar>-[AYGW]-[EQKMDLC]-<Positive>-<Small>-<Small>-<Polar>-[DCPM]-<Aliphatic>-<Aliphatic>-<Positive>-[ACWL]-<Hydrophobic>-<Hydrophobic>-[GAMLSPVN]-[LGYMNPQKFSHV]-[MLNPQGVYRS]-<Small>-[MGDKPVQSA]-<Polar>-[PSDRMTCQ]-[VMHPSYNGFD]",
101                            "[NSL]-<Aromatic>-[ACGTR]-<Small>-<Negative>-<Negative>-[IVLTHE]-<Polar>-<Tiny>-<Negative>-<Negative>-D-<Aromatic>-<Hydrophobic>-<Negative>-[LFAYERWD]-[LAYEC]-[GASH]-<Small>-<Polar>-<Hydrophobic>-[EMV]-[IC]-<Small>-[EDRV]-<Positive>-[FYW]-<Positive>-[QEKYD]-<Small>-<Polar>-[YQWTGV]-<Small>-[FGMLS]-[LWGAHT]-<Small>-<Small>-<Small>-<Aliphatic>-<Small>-<Small>-<Hydrophobic>-<Hydrophobic>-<Negative>-<Hydrophobic>-[KWQGNYLAEPS]-<Hydrophobic>-?-?-?-[MPISKYGWRNAFD]-?-?-?-?-?-?-?-[RNSHCWGEMPI]-?-?-?-?-?-[QAESPNR]-?-<Hydrophobic>-?-?-?-?-?-?-?-?-?-?-?-?-?-?-?-?-[TMIRVNFSHG]-?-?-?-[AMSRIQFVNGEDT]-[MAPYNTFSRIDHWV]-<Hydrophobic>-?-[SADGHLIMCKFNQRV]-[EGKLCMIHATW]-[MSATDPLKGYNEFCR]-[MSRNGAKQEWPHC]-[NMSGDTQVYKRW]-[NKLSADTYERPVFHW]-[NSKVPQDEY]-[NMSEGPKTCY]-[DERQYNHPM]"
102                    };
103                    params.setPopulationSize(3);
104                    ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
105                    analysisHook.reset();
106                    ga.addHook(analysisHook);
107    
108                    result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
109                    ProteinLocationClassifier nucleusClass = (ProteinLocationClassifier) result.getFittestIndividual();
110    
111    
112                    System.out.println("\n\n  **  **  ******************** MITOCHONDTION: ********************  **  **");
113    
114                    allPositives = new ProteinGroup();
115                    allPositives.add(mitochondrion);
116                    allNegatives = new ProteinGroup();
117                    allNegatives.add(nucleus);
118                    allNegatives.add(extracellular);
119                    allNegatives.add(cytosol);
120                    params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
121                    factory.setClassifierName("Mitochondrion");
122    
123                    classif = new Object [] {
124                            "<Aliphatic>-<Small>-<Small>-[MHWT]-[QRDPL]-[WQRDP]-[TSCPK]-[TNAGD]-[TGPCI]-[KWYES]-[FYHW]-[FYHW]-[WY]-<Aromatic>-<Aromatic>-[MP]-<Tiny>-[GPDCTK]-<Tiny>-<Hydrophobic>-[RYVELC]-<Small>-<Small>-<Aromatic>-[YNHK]-[YNHK]-[DEVL]-[ILS]-<Small>-[SI]-[WYHA]-<Aliphatic>-<Positive>-<Small>-[SAPTH]-<Hydrophobic>-<Hydrophobic>-[TREYH]-[RKI]-[RKI]-[DQI]-[HKE]-[PHD]-<Small>-<Small>-<Hydrophobic>-<Polar>-[SNCDK]-<Aliphatic>-[LISWY]-<Tiny>-<Aliphatic>-<Small>-[KHRQ]-<Tiny>-[HCVGK]-<Hydrophobic>-<Aliphatic>-[KGFDL]-[VIP]-[NWQLEISGA]-<Tiny>-<Tiny>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[CNGVAM]-[LEQ]-<Positive>-<Hydrophobic>-<Hydrophobic>-[RHKWC]-[WKHS]-[AMWLT]-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-[WLAYF]-[LGCYSE]-[MHTIWN]-[CTPNY]-<Tiny>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Small>-<Small>-<Hydrophobic>-[VKEFWLA]-<Hydrophobic>-<Hydrophobic>-[WEARNLDG]-<Hydrophobic>-[GLMRINDASQPY]-[LMREFINVAGT]-[FMLIYW]-[LRFI]-[SAGRPFWQKNITH]-[TRSCAKMYPV]"
125                    };
126                    params.setPopulationSize(1);
127                    ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
128                    analysisHook.reset();
129                    ga.addHook(analysisHook);
130    
131                    result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
132                    ProteinLocationClassifier mitochondrionClass = (ProteinLocationClassifier) result.getFittestIndividual();
133    
134                    System.out.println("\n\n  **  **  ******************** EXTRACELLULAR: ********************  **  **");
135    
136                    allPositives = new ProteinGroup();
137                    allPositives.add(extracellular);
138                    allNegatives = new ProteinGroup();
139                    allNegatives.add(mitochondrion);
140                    allNegatives.add(nucleus);
141                    allNegatives.add(cytosol);
142                    params.setFitnessEvaluationAlgorithm(new ProteinLocationTrainer(allPositives, allNegatives, 3));
143                    factory.setClassifierName("Extracellular");
144    
145                    classif = new Object [] {
146                            "[QWYTE]-[KLWF]-[MWGVP]-[PLT]-[PLT]-[ASGE]-<Negative>-<Hydrophobic>-[WICVDQ]-<Hydrophobic>-<Small>-<Small>-<Hydrophobic>-<Tiny>-[YSEN]-<Hydrophobic>-[LHMYID]-[LHMYID]-<Small>-<Small>-[VSTAW]-<Positive>-<Hydrophobic>-<Aromatic>-<Aromatic>-<Polar>-[WLTIMR]-[EKC]-<Small>-A-<Hydrophobic>-<Small>-<Small>-[RAL]-<Hydrophobic>-<Polar>-[LVIT]-[ASGTH]-[NKDYWF]-[WSTQ]-[WQHIPN]-[ESDW]-<Hydrophobic>-<Hydrophobic>-[LVFCHK]-[CYKAT]-<Hydrophobic>-[QPWYKR]-<Small>-[GSAV]-<Hydrophobic>-[ST]-[TCPA]-[VPDC]-[DNW]-[DNL]-[YEWTD]-G-<Small>-[TCL]-[HRW]-[EGV]-[DVP]-<Small>-<Small>-<Small>-[CHYR]-<Small>-[FYHW]-<Small>-[SQNYWL]-[KTDQE]-<Positive>-<Positive>-<Hydrophobic>-<Hydrophobic>-<Hydrophobic>-<Small>-<Small>-<Small>-<Hydrophobic>-[AWYIE]-[SGWN]-[YMVTSH]-[PTCNWGQDVHMI]-?-?-?-[MKRPSHWATFVLEQC]-?-?-?-?-?-?-[FLINSQVWC]-<Hydrophobic>-[LCFW]-[LI]-<Hydrophobic>"
147                    };
148                    params.setPopulationSize(1);
149                    ga = new InitialPopulationGA(classif, params, 1.0, 0.0);
150                    analysisHook.reset();
151                    ga.addHook(analysisHook);
152    
153                    result = (FittestIndividualResult) ((ReusableSimpleGA) ga).exec();
154                    ProteinLocationClassifier extracellularClass = (ProteinLocationClassifier) result.getFittestIndividual();
155    
156    
157                    Locator locator = new Locator();
158                    locator.setClassifiers(cytosolClass, nucleusClass,
159                                                               mitochondrionClass, extracellularClass);
160                    locator.exec("D:/Courseworks/4C58/cw/data/Unk.fasta");
161                    //locator.exec("D:/Courseworks/4C58/cw/data/Cytosol.valid.dat");
162                    //locator.exec("D:/Courseworks/4C58/cw/data/Nucleus.valid.dat");
163                    //locator.exec("D:/Courseworks/4C58/cw/data/Mitochondrion.valid.dat");
164                    //locator.exec("D:/Courseworks/4C58/cw/data/Extracellular.valid.dat");
165                    //locator.exec("D:/Courseworks/4C58/cw/data/Cytosol.train.dat");
166                    //locator.exec("D:/Courseworks/4C58/cw/data/Nucleus.train.dat");
167                    //locator.exec("D:/Courseworks/4C58/cw/data/Mitochondrion.train.dat");
168                    //locator.exec("D:/Courseworks/4C58/cw/data/Extracellular.train.dat");
169    
170            }
171    
172            public static void main(String[] unusedArgs) {
173                    Validator validator = new Validator();
174                    validator.exec();
175            }
176    
177    }