001 package org.jaga.exampleApplications.proteinLocation;
002
003 import org.jaga.individualRepresentation.proteinLocation.*;
004 import org.jaga.selection.*;
005
006 /**
007 * TODO: Complete these comments.
008 *
009 * <p><u>Project:</u> JAGA - Java API for Genetic Algorithms.</p>
010 *
011 * <p><u>Company:</u> University College London and JAGA.Org
012 * (<a href="http://www.jaga.org" target="_blank">http://www.jaga.org</a>).
013 * </p>
014 *
015 * <p><u>Copyright:</u> (c) 2004 by G. Paperin.<br/>
016 * This program is free software; you can redistribute it and/or modify
017 * it under the terms of the GNU General Public License as published by
018 * the Free Software Foundation, ONLY if you include a note of the original
019 * author(s) in any redistributed/modified copy.<br/>
020 * This program is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023 * GNU General Public License for more details.<br/>
024 * You should have received a copy of the GNU General Public License
025 * along with this program; if not, write to the Free Software
026 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
027 * or see http://www.gnu.org/licenses/gpl.html</p>
028 *
029 * @author Greg Paperin (greg@jaga.org)
030 *
031 * @version JAGA public release 1.0 beta
032 */
033
034 public class Locator {
035
036 private static final int minOverlap = 3;
037
038 private static final int A = 0;
039 private static final int B = 1;
040 private static final int C = 2;
041 private static final int D = 3;
042 private static final String [] typeNames = new String [] {"Cytosol",
043 "Nucleus",
044 "Mitochondrion",
045 "Extracellular"};
046
047 private ProteinLocationClassifier [] classifiers = new ProteinLocationClassifier[D+1];
048 private int totalClassifications[] = new int[D+1];
049
050 public Locator() {}
051
052 public void setClassifiers(ProteinLocationClassifier cytosol,
053 ProteinLocationClassifier nucleus,
054 ProteinLocationClassifier mitochond,
055 ProteinLocationClassifier extracell) {
056 this.classifiers[A] = cytosol;
057 this.classifiers[B] = nucleus;
058 this.classifiers[C] = mitochond;
059 this.classifiers[D] = extracell;
060 }
061
062 private double sensitivity(int i) {
063 return ((ClassifierFitness) classifiers[i].getFitness()).getSensitivity();
064 }
065
066 private double specificity(int i) {
067 return ((ClassifierFitness) classifiers[i].getFitness()).getSpecificity();
068 }
069
070 private int analyseResults(Protein prot, boolean [] match) {
071
072 // Get the basic probability assignments:
073 double [] m = new double[]{0, 0, 0, 0};
074 double [] mNOT = new double[]{0, 0, 0, 0};
075 double [] doubt = new double[]{0, 0, 0, 0};
076 for (int i = A; i <= D; i++) {
077 if (match[i]) {
078 m[i] = sensitivity(i);
079 doubt[i] = 1.0 - m[i];
080 } else {
081 mNOT[i] = sensitivity(i);
082 doubt[i] = 1.0 - mNOT[i];
083 }
084 }
085
086 // Get combined probability assignments:
087
088 /*
089 double cmEmpty = m[A] * m[B] + m[A] * m[C] + m[A] * m[D]
090 + m[B] * m[C] + m[B] * m[D]
091 + m[C] * m[D];
092
093 double norm = 1.0 - cmEmpty;
094
095 double cmA = m[A] * mNOT[B] + m[A] * mNOT[C] + m[A] * mNOT[D]
096 + m[A] * doubt[B] + m[A] * doubt[C] + m[A] * doubt[D];
097 cmA /= norm;
098
099 double cmB = m[B] * mNOT[A] + m[B] * mNOT[C] + m[B] * mNOT[D]
100 + m[B] * doubt[A] + m[B] * doubt[C] + m[B] * doubt[D];
101 cmB /= norm;
102
103 double cmC = m[C] * mNOT[A] + m[C] * mNOT[B] + m[C] * mNOT[D]
104 + m[C] * doubt[A] + m[C] * doubt[B] + m[C] * doubt[D];
105 cmC /= norm;
106
107 double cmD = m[D] * mNOT[A] + m[D] * mNOT[B] + m[D] * mNOT[C]
108 + m[D] * doubt[A] + m[D] * doubt[B] + m[D] * doubt[C];
109 cmD /= norm;
110
111 double cmAB = (mNOT[C] * mNOT[B]) / norm;
112 double cmAC = (mNOT[B] * mNOT[D]) / norm;
113 double cmAD = (mNOT[B] * mNOT[C]) / norm;
114 double cmBC = (mNOT[A] * mNOT[D]) / norm;
115 double cmBD = (mNOT[A] * mNOT[C]) / norm;
116 double cmCD = (mNOT[A] * mNOT[B]) / norm;
117
118 double cmABC = (mNOT[D] * doubt[A] + mNOT[D] * doubt[B] + mNOT[D] * doubt[C]) / norm;
119 double cmABD = (mNOT[C] * doubt[A] + mNOT[C] * doubt[B] + mNOT[C] * doubt[D]) / norm;
120 double cmACD = (mNOT[B] * doubt[A] + mNOT[B] * doubt[C] + mNOT[B] * doubt[D]) / norm;
121 double cmBCD = (mNOT[A] * doubt[B] + mNOT[A] * doubt[C] + mNOT[A] * doubt[D]) / norm;
122
123 double cmABCD = doubt[A] * doubt[B] + doubt[A] * doubt[C] + doubt[A] * doubt[D]
124 + doubt[B] * doubt[C] + doubt[B] * doubt[D] + doubt[C] * doubt[D];
125 cmABCD /= norm;
126
127 double checkSum = cmA + cmB + cmC + cmD
128 + cmAB + cmAC + cmAD + cmBC + cmBD + cmCD
129 + cmABC + cmABD + cmACD + cmBCD
130 + cmABCD;
131 */
132 double cmEmpty = m[A] * m[B] * m[C] * m[D]
133 + mNOT[A] * mNOT[B] * mNOT[C] * mNOT[D];
134
135 double norm = 1.0 - cmEmpty;
136 //double norm = 1.0;
137
138 double cmA = m[A] * mNOT[B] * mNOT[C] * mNOT[D]
139 + m[A] * mNOT[B] * mNOT[C] * doubt[D]
140 + m[A] * mNOT[B] * doubt[C] * mNOT[D]
141 + m[A] * doubt[B] * mNOT[C] * mNOT[D]
142 + m[A] * mNOT[B] * doubt[C] * doubt[D]
143 + m[A] * doubt[B] * mNOT[C] * doubt[D]
144 + m[A] * doubt[B] * doubt[C] * mNOT[D]
145 + m[A] * doubt[B] * doubt[C] * doubt[D]
146 + doubt[A] * mNOT[B] * mNOT[C] * mNOT[D];
147 cmA /= norm;
148
149 double cmB = m[B] * mNOT[A] * mNOT[C] * mNOT[D]
150 + m[B] * mNOT[A] * mNOT[C] * doubt[D]
151 + m[B] * mNOT[A] * doubt[C] * mNOT[D]
152 + m[B] * doubt[A] * mNOT[C] * mNOT[D]
153 + m[B] * mNOT[A] * doubt[C] * doubt[D]
154 + m[B] * doubt[A] * mNOT[C] * doubt[D]
155 + m[B] * doubt[A] * doubt[C] * mNOT[D]
156 + m[B] * doubt[A] * doubt[C] * doubt[D]
157 + doubt[B] * mNOT[A] * mNOT[C] * mNOT[D];
158 cmB /= norm;
159
160 double cmC = m[C] * mNOT[B] * mNOT[A] * mNOT[D]
161 + m[C] * mNOT[B] * mNOT[A] * doubt[D]
162 + m[C] * mNOT[B] * doubt[A] * mNOT[D]
163 + m[C] * doubt[B] * mNOT[A] * mNOT[D]
164 + m[C] * mNOT[B] * doubt[A] * doubt[D]
165 + m[C] * doubt[B] * mNOT[A] * doubt[D]
166 + m[C] * doubt[B] * doubt[A] * mNOT[D]
167 + m[C] * doubt[B] * doubt[A] * doubt[D]
168 + doubt[C] * mNOT[B] * mNOT[A] * mNOT[D];
169 cmC /= norm;
170
171 double cmD = m[D] * mNOT[B] * mNOT[C] * mNOT[A]
172 + m[D] * mNOT[B] * mNOT[C] * doubt[A]
173 + m[D] * mNOT[B] * doubt[C] * mNOT[A]
174 + m[D] * doubt[B] * mNOT[C] * mNOT[A]
175 + m[D] * mNOT[B] * doubt[C] * doubt[A]
176 + m[D] * doubt[B] * mNOT[C] * doubt[A]
177 + m[D] * doubt[B] * doubt[C] * mNOT[A]
178 + m[D] * doubt[B] * doubt[C] * doubt[A]
179 + doubt[D] * mNOT[B] * mNOT[C] * mNOT[A];
180 cmD /= norm;
181
182
183 double cmAB = (doubt[A] * doubt[B] * mNOT[C] * mNOT[B]) / norm;
184 double cmAC = (doubt[A] * doubt[C] * mNOT[B] * mNOT[D]) / norm;
185 double cmAD = (doubt[A] * doubt[D] * mNOT[B] * mNOT[C]) / norm;
186 double cmBC = (doubt[B] * doubt[C] * mNOT[A] * mNOT[D]) / norm;
187 double cmBD = (doubt[B] * doubt[D] * mNOT[A] * mNOT[C]) / norm;
188 double cmCD = (doubt[C] * doubt[D] * mNOT[A] * mNOT[B]) / norm;
189
190 double cmABC = (mNOT[D] * doubt[A] * doubt[B] * doubt[C]) / norm;
191 double cmABD = (mNOT[C] * doubt[A] * doubt[B] * doubt[D]) / norm;
192 double cmACD = (mNOT[B] * doubt[A] * doubt[C] * doubt[D]) / norm;
193 double cmBCD = (mNOT[A] * doubt[B] * doubt[C] * doubt[D]) / norm;
194
195 double cmABCD = (doubt[A] * doubt[B] * doubt[C] * doubt[D]) / norm;
196
197 // Get belief and plausibilitiy:
198
199 double [] belief = new double[]{cmA, cmB, cmC, cmD};
200 double [] plaus = new double[]{cmBCD, cmACD, cmABD, cmABC};
201
202
203 // Print results:
204 int overallClass = -1;
205 double topcConf = -1;
206 System.out.println("\nClassification of protein " + prot.getName() + ":");
207 for (int i = A; i <= D; i++) {
208 double conf = belief[i] - plaus[i];
209 System.out.println(" " + typeNames[i]
210 + "?\tBelief = " + belief[i]
211 + ";\tPlausibility = " + plaus[i] + ".");
212 if (conf > topcConf) {
213 overallClass = i;
214 topcConf = conf;
215 }
216 }
217 if (overallClass < 0)
218 System.out.println(" DESISION: Cannot localise protein.");
219 else
220 System.out.println(" DESISION: (" + ((char) ('A' + overallClass)) + ") "
221 + typeNames[overallClass]
222 + " (confidence: " + topcConf
223 + " evience for: " + belief[overallClass]
224 + " evidence against: " + plaus[overallClass] + ")");
225 return overallClass;
226 }
227
228 private boolean align(Protein protein, ProteinLocationClassifier classif) {
229
230 final PolypeptidePattern pattern = classif.getPattern();
231 final int patLen = pattern.getLength();
232
233 int pStart = -(patLen - minOverlap);
234 if (pStart > 0)
235 pStart = 0;
236 final int pStop = protein.getLength() - minOverlap;
237
238 for (int p = pStart; p <= pStop; p++) {
239 if (pattern.matchesPerformanceHack(protein.getSequenceReferencePerformanceHack(), p))
240 return true;
241 }
242 return false;
243 }
244
245 private int classifyProtein(Protein prot) {
246 if (null == classifiers || 0 == classifiers.length) {
247 System.out.println("No classifiers available");
248 return -1;
249 }
250
251 boolean [] match = new boolean[classifiers.length];
252 for (int i = A; i <= D; i++) {
253 match[i] = align(prot, classifiers[i]);
254 }
255
256 return analyseResults(prot, match);
257 }
258
259 public void exec(String fastaFile) {
260 SimplifiedFastaFileParser parser = new SimplifiedFastaFileParser();
261 ProteinGroup group = new ProteinGroup("Unknown", parser, fastaFile);
262 for (int i = 0; i < group.size(); i++) {
263 final Protein prot = group.getProtein(i);
264 final int classification = classifyProtein(prot);
265 if (classification > -1)
266 totalClassifications[classification]++;
267 }
268 System.out.println("--------------------------------------------------------------------------------");
269 System.out.println("TOTALS:");
270 for (int i = A; i <= D; i++)
271 System.out.println("Total " + typeNames[i] + ": " + totalClassifications[i]);
272 System.out.println("TOTAL PROTEINS: " + group.size());
273
274 }
275
276 public static void main(String[] unusedArgs) {
277 Locator locator = new Locator();
278 locator.exec("D:/Courseworks/4C58/cw/data/Unk.fasta");
279 }
280
281 }