001 package org.jaga.individualRepresentation.proteinLocation;
002
003 import java.io.*;
004 import java.util.ArrayList;
005
006 /**
007 * TODO: Complete these comments.
008 *
009 * <p><u>Project:</u> JAGA - Java API for Genetic Algorithms.</p>
010 *
011 * <p><u>Company:</u> University College London and JAGA.Org
012 * (<a href="http://www.jaga.org" target="_blank">http://www.jaga.org</a>).
013 * </p>
014 *
015 * <p><u>Copyright:</u> (c) 2004 by G. Paperin.<br/>
016 * This program is free software; you can redistribute it and/or modify
017 * it under the terms of the GNU General Public License as published by
018 * the Free Software Foundation, ONLY if you include a note of the original
019 * author(s) in any redistributed/modified copy.<br/>
020 * This program is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023 * GNU General Public License for more details.<br/>
024 * You should have received a copy of the GNU General Public License
025 * along with this program; if not, write to the Free Software
026 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
027 * or see http://www.gnu.org/licenses/gpl.html</p>
028 *
029 * @author Greg Paperin (greg@jaga.org)
030 *
031 * @version JAGA public release 1.0 beta
032 */
033
034 public class SimplifiedFastaFileParser implements ProteinFileParser {
035
036 public SimplifiedFastaFileParser() {}
037
038 public void readFromFile(String fileName, ProteinGroup callbackAccumulator)
039 throws java.io.IOException {
040
041 BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
042 try {
043
044 String s = in.readLine();
045
046 while (null != s) {
047 while (null != s && (0 == s.trim().length() || '>' != s.trim().charAt(0)))
048 s = in.readLine();
049
050 if (null == s)
051 continue;
052
053 String name = "<untitled>";
054 String sequence = "";
055 s = s.trim();
056 if (s.length() > 1)
057 name = s.substring(1);
058
059 s = in.readLine();
060 while(s != null && s.trim().length() > 0 && '>' != s.trim().charAt(0)) {
061 //System.out.println(s);
062 sequence += s.trim().toUpperCase();
063 s = in.readLine();
064 }
065
066 ArrayList seqs = new ArrayList();
067 seqs.add(sequence);
068 int i = 0;
069 while (i < seqs.size()) {
070 String seq = (String) seqs.get(i);
071 if (seq.indexOf('B') >= 0) {
072 seqs.remove(i);
073 copyReplace(sequence, 'B', 'D');
074 copyReplace(sequence, 'B', 'N');
075 i = 0;
076 } else if (seq.indexOf('Z') >= 0) {
077 seqs.remove(i);
078 copyReplace(sequence, 'Z', 'E');
079 copyReplace(sequence, 'Z', 'Q');
080 i = 0;
081 } else
082 ++i;
083 }
084
085 if (1 == seqs.size()) {
086 Protein prot = new Protein(name, (String) seqs.get(0));
087 callbackAccumulator.add(prot);
088 } else {
089 for (i = 0; i < seqs.size(); i++) {
090 Protein prot = new Protein(name + "(" + i + ")", (String) seqs.get(i));
091 callbackAccumulator.add(prot);
092 }
093 }
094
095 }
096
097 } finally {
098 in.close();
099 }
100 }
101
102 private String copyReplace(String str, char oldChar, char newChar) {
103 StringBuffer b = new StringBuffer(str);
104 for (int i = 0; i < b.length(); i++) {
105 if (b.charAt(i) == oldChar)
106 b.setCharAt(i, newChar);
107 }
108 return b.toString();
109 }
110 }