001    package org.jaga.exampleApplications.proteinLocation;
002    
003    import org.jaga.individualRepresentation.proteinLocation.*;
004    import org.jaga.selection.*;
005    
006    /**
007     * TODO: Complete these comments.
008     *
009     * <p><u>Project:</u> JAGA - Java API for Genetic Algorithms.</p>
010     *
011     * <p><u>Company:</u> University College London and JAGA.Org
012     *    (<a href="http://www.jaga.org" target="_blank">http://www.jaga.org</a>).
013     * </p>
014     *
015     * <p><u>Copyright:</u> (c) 2004 by G. Paperin.<br/>
016     *    This program is free software; you can redistribute it and/or modify
017     *    it under the terms of the GNU General Public License as published by
018     *    the Free Software Foundation, ONLY if you include a note of the original
019     *    author(s) in any redistributed/modified copy.<br/>
020     *    This program is distributed in the hope that it will be useful,
021     *    but WITHOUT ANY WARRANTY; without even the implied warranty of
022     *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023     *    GNU General Public License for more details.<br/>
024     *    You should have received a copy of the GNU General Public License
025     *    along with this program; if not, write to the Free Software
026     *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
027     *    or see http://www.gnu.org/licenses/gpl.html</p>
028     *
029     * @author Greg Paperin (greg@jaga.org)
030     *
031     * @version JAGA public release 1.0 beta
032     */
033    
034    public class Locator {
035    
036            private static final int minOverlap = 3;
037    
038            private static final int A = 0;
039            private static final int B = 1;
040            private static final int C = 2;
041            private static final int D = 3;
042            private static final String [] typeNames = new String [] {"Cytosol",
043                                                                                                                              "Nucleus",
044                                                                                                                              "Mitochondrion",
045                                                                                                                              "Extracellular"};
046    
047            private ProteinLocationClassifier [] classifiers = new ProteinLocationClassifier[D+1];
048            private int totalClassifications[] = new int[D+1];
049    
050            public Locator() {}
051    
052            public void setClassifiers(ProteinLocationClassifier cytosol,
053                                                               ProteinLocationClassifier nucleus,
054                                                               ProteinLocationClassifier mitochond,
055                                                               ProteinLocationClassifier extracell) {
056                    this.classifiers[A] = cytosol;
057                    this.classifiers[B] = nucleus;
058                    this.classifiers[C] = mitochond;
059                    this.classifiers[D] = extracell;
060            }
061    
062            private double sensitivity(int i) {
063                    return ((ClassifierFitness) classifiers[i].getFitness()).getSensitivity();
064            }
065    
066            private double specificity(int i) {
067                    return ((ClassifierFitness) classifiers[i].getFitness()).getSpecificity();
068            }
069    
070            private int analyseResults(Protein prot, boolean [] match) {
071    
072                    // Get the basic probability assignments:
073                    double [] m = new double[]{0, 0, 0, 0};
074                    double [] mNOT = new double[]{0, 0, 0, 0};
075                    double [] doubt = new double[]{0, 0, 0, 0};
076                    for (int i = A; i <= D; i++) {
077                            if (match[i]) {
078                                    m[i] = sensitivity(i);
079                                    doubt[i] = 1.0 - m[i];
080                            } else {
081                                    mNOT[i] = sensitivity(i);
082                                    doubt[i] = 1.0 - mNOT[i];
083                            }
084                    }
085    
086                    // Get combined probability assignments:
087    
088                    /*
089                    double cmEmpty = m[A] * m[B] + m[A] * m[C] + m[A] * m[D]
090                                               + m[B] * m[C] + m[B] * m[D]
091                                               + m[C] * m[D];
092    
093                    double norm = 1.0 - cmEmpty;
094    
095                    double cmA = m[A] * mNOT[B]  + m[A] * mNOT[C]  + m[A] * mNOT[D]
096                                       + m[A] * doubt[B] + m[A] * doubt[C] + m[A] * doubt[D];
097                    cmA /= norm;
098    
099                    double cmB = m[B] * mNOT[A]  + m[B] * mNOT[C]  + m[B] * mNOT[D]
100                                       + m[B] * doubt[A] + m[B] * doubt[C] + m[B] * doubt[D];
101                    cmB /= norm;
102    
103                    double cmC = m[C] * mNOT[A]  + m[C] * mNOT[B]  + m[C] * mNOT[D]
104                                       + m[C] * doubt[A] + m[C] * doubt[B] + m[C] * doubt[D];
105                    cmC /= norm;
106    
107                    double cmD = m[D] * mNOT[A]  + m[D] * mNOT[B]  + m[D] * mNOT[C]
108                                       + m[D] * doubt[A] + m[D] * doubt[B] + m[D] * doubt[C];
109                    cmD /= norm;
110    
111                    double cmAB = (mNOT[C] * mNOT[B]) / norm;
112                    double cmAC = (mNOT[B] * mNOT[D]) / norm;
113                    double cmAD = (mNOT[B] * mNOT[C]) / norm;
114                    double cmBC = (mNOT[A] * mNOT[D]) / norm;
115                    double cmBD = (mNOT[A] * mNOT[C]) / norm;
116                    double cmCD = (mNOT[A] * mNOT[B]) / norm;
117    
118                    double cmABC = (mNOT[D] * doubt[A] + mNOT[D] * doubt[B] + mNOT[D] * doubt[C]) / norm;
119                    double cmABD = (mNOT[C] * doubt[A] + mNOT[C] * doubt[B] + mNOT[C] * doubt[D]) / norm;
120                    double cmACD = (mNOT[B] * doubt[A] + mNOT[B] * doubt[C] + mNOT[B] * doubt[D]) / norm;
121                    double cmBCD = (mNOT[A] * doubt[B] + mNOT[A] * doubt[C] + mNOT[A] * doubt[D]) / norm;
122    
123                    double cmABCD = doubt[A] * doubt[B] + doubt[A] * doubt[C] + doubt[A] * doubt[D]
124                                              + doubt[B] * doubt[C] + doubt[B] * doubt[D] + doubt[C] * doubt[D];
125                    cmABCD /= norm;
126    
127                    double checkSum = cmA + cmB + cmC + cmD
128                                                    + cmAB + cmAC + cmAD + cmBC + cmBD + cmCD
129                                                    + cmABC + cmABD + cmACD + cmBCD
130                                                    + cmABCD;
131                    */
132                    double cmEmpty = m[A]    * m[B]    * m[C]    * m[D]
133                                               + mNOT[A] * mNOT[B] * mNOT[C] * mNOT[D];
134    
135                    double norm = 1.0 - cmEmpty;
136                    //double norm = 1.0;
137    
138                    double cmA = m[A] * mNOT[B]  * mNOT[C]  * mNOT[D]
139                                       + m[A] * mNOT[B]  * mNOT[C]  * doubt[D]
140                                       + m[A] * mNOT[B]  * doubt[C] * mNOT[D]
141                                       + m[A] * doubt[B] * mNOT[C]  * mNOT[D]
142                                       + m[A] * mNOT[B]  * doubt[C] * doubt[D]
143                                       + m[A] * doubt[B] * mNOT[C]  * doubt[D]
144                                       + m[A] * doubt[B] * doubt[C] * mNOT[D]
145                                       + m[A] * doubt[B] * doubt[C] * doubt[D]
146                                       + doubt[A] * mNOT[B] * mNOT[C] * mNOT[D];
147                    cmA /= norm;
148    
149                    double cmB = m[B] * mNOT[A]  * mNOT[C]  * mNOT[D]
150                                       + m[B] * mNOT[A]  * mNOT[C]  * doubt[D]
151                                       + m[B] * mNOT[A]  * doubt[C] * mNOT[D]
152                                       + m[B] * doubt[A] * mNOT[C]  * mNOT[D]
153                                       + m[B] * mNOT[A]  * doubt[C] * doubt[D]
154                                       + m[B] * doubt[A] * mNOT[C]  * doubt[D]
155                                       + m[B] * doubt[A] * doubt[C] * mNOT[D]
156                                       + m[B] * doubt[A] * doubt[C] * doubt[D]
157                                       + doubt[B] * mNOT[A] * mNOT[C] * mNOT[D];
158                    cmB /= norm;
159    
160                    double cmC = m[C] * mNOT[B]  * mNOT[A]  * mNOT[D]
161                                       + m[C] * mNOT[B]  * mNOT[A]  * doubt[D]
162                                       + m[C] * mNOT[B]  * doubt[A] * mNOT[D]
163                                       + m[C] * doubt[B] * mNOT[A]  * mNOT[D]
164                                       + m[C] * mNOT[B]  * doubt[A] * doubt[D]
165                                       + m[C] * doubt[B] * mNOT[A]  * doubt[D]
166                                       + m[C] * doubt[B] * doubt[A] * mNOT[D]
167                                       + m[C] * doubt[B] * doubt[A] * doubt[D]
168                                       + doubt[C] * mNOT[B] * mNOT[A] * mNOT[D];
169                    cmC /= norm;
170    
171                    double cmD = m[D] * mNOT[B]  * mNOT[C]  * mNOT[A]
172                                       + m[D] * mNOT[B]  * mNOT[C]  * doubt[A]
173                                       + m[D] * mNOT[B]  * doubt[C] * mNOT[A]
174                                       + m[D] * doubt[B] * mNOT[C]  * mNOT[A]
175                                       + m[D] * mNOT[B]  * doubt[C] * doubt[A]
176                                       + m[D] * doubt[B] * mNOT[C]  * doubt[A]
177                                       + m[D] * doubt[B] * doubt[C] * mNOT[A]
178                                       + m[D] * doubt[B] * doubt[C] * doubt[A]
179                                       + doubt[D] * mNOT[B] * mNOT[C] * mNOT[A];
180                    cmD /= norm;
181    
182    
183                    double cmAB = (doubt[A] * doubt[B] * mNOT[C] * mNOT[B]) / norm;
184                    double cmAC = (doubt[A] * doubt[C] * mNOT[B] * mNOT[D]) / norm;
185                    double cmAD = (doubt[A] * doubt[D] * mNOT[B] * mNOT[C]) / norm;
186                    double cmBC = (doubt[B] * doubt[C] * mNOT[A] * mNOT[D]) / norm;
187                    double cmBD = (doubt[B] * doubt[D] * mNOT[A] * mNOT[C]) / norm;
188                    double cmCD = (doubt[C] * doubt[D] * mNOT[A] * mNOT[B]) / norm;
189    
190                    double cmABC = (mNOT[D] * doubt[A] * doubt[B] * doubt[C]) / norm;
191                    double cmABD = (mNOT[C] * doubt[A] * doubt[B] * doubt[D]) / norm;
192                    double cmACD = (mNOT[B] * doubt[A] * doubt[C] * doubt[D]) / norm;
193                    double cmBCD = (mNOT[A] * doubt[B] * doubt[C] * doubt[D]) / norm;
194    
195                    double cmABCD = (doubt[A] * doubt[B] * doubt[C] * doubt[D]) / norm;
196    
197                    // Get belief and plausibilitiy:
198    
199                    double [] belief = new double[]{cmA, cmB, cmC, cmD};
200                    double [] plaus = new double[]{cmBCD, cmACD, cmABD, cmABC};
201    
202    
203                    // Print results:
204                    int overallClass = -1;
205                    double topcConf = -1;
206                    System.out.println("\nClassification of protein " + prot.getName() + ":");
207                    for (int i = A; i <= D; i++) {
208                            double conf = belief[i] - plaus[i];
209                            System.out.println("    " + typeNames[i]
210                                                             + "?\tBelief = " + belief[i]
211                                                             + ";\tPlausibility = " + plaus[i] + ".");
212                            if (conf > topcConf) {
213                                    overallClass = i;
214                                    topcConf = conf;
215                            }
216                    }
217                    if (overallClass < 0)
218                            System.out.println("    DESISION: Cannot localise protein.");
219                    else
220                            System.out.println("    DESISION: (" + ((char) ('A' + overallClass)) + ") "
221                                                               + typeNames[overallClass]
222                                                               + " (confidence: " + topcConf
223                                                               + " evience for: " + belief[overallClass]
224                                                               + " evidence against: " + plaus[overallClass] + ")");
225                    return overallClass;
226            }
227    
228            private boolean align(Protein protein, ProteinLocationClassifier classif) {
229    
230                    final PolypeptidePattern pattern = classif.getPattern();
231                    final int patLen = pattern.getLength();
232    
233                    int pStart = -(patLen - minOverlap);
234                    if (pStart > 0)
235                            pStart = 0;
236                    final int pStop = protein.getLength() - minOverlap;
237    
238                    for (int p = pStart; p <= pStop; p++) {
239                            if (pattern.matchesPerformanceHack(protein.getSequenceReferencePerformanceHack(), p))
240                                    return true;
241                    }
242                    return false;
243            }
244    
245            private int classifyProtein(Protein prot) {
246                    if (null == classifiers || 0 == classifiers.length) {
247                            System.out.println("No classifiers available");
248                            return -1;
249                    }
250    
251                    boolean [] match = new boolean[classifiers.length];
252                    for (int i = A; i <= D; i++) {
253                            match[i] = align(prot, classifiers[i]);
254                    }
255    
256                    return analyseResults(prot, match);
257            }
258    
259            public void exec(String fastaFile) {
260                    SimplifiedFastaFileParser parser = new SimplifiedFastaFileParser();
261                    ProteinGroup group = new ProteinGroup("Unknown", parser, fastaFile);
262                    for (int i = 0; i < group.size(); i++) {
263                            final Protein prot = group.getProtein(i);
264                            final int classification = classifyProtein(prot);
265                            if (classification > -1)
266                                    totalClassifications[classification]++;
267                    }
268                    System.out.println("--------------------------------------------------------------------------------");
269                    System.out.println("TOTALS:");
270                    for (int i = A; i <= D; i++)
271                            System.out.println("Total " + typeNames[i] + ": " + totalClassifications[i]);
272                    System.out.println("TOTAL PROTEINS: " + group.size());
273    
274            }
275    
276            public static void main(String[] unusedArgs) {
277                    Locator locator = new Locator();
278                    locator.exec("D:/Courseworks/4C58/cw/data/Unk.fasta");
279            }
280    
281    }