FASTA format: Difference between revisions

From Rosetta Code
Content added Content deleted
(Added uBasic/4tH version)
(→‎version 2: rewritten)
 
(15 intermediate revisions by 11 users not shown)
Line 27: Line 27:
{{trans|Python}}
{{trans|Python}}


<lang 11l>V FASTA =
<syntaxhighlight lang="11l">V FASTA =
|‘>Rosetta_Example_1
|‘>Rosetta_Example_1
THERECANBENOSPACE
THERECANBENOSPACE
Line 51: Line 51:
R r
R r


print(fasta_parse(FASTA).map((key, val) -> ‘#.: #.’.format(key, val)).join("\n"))</lang>
print(fasta_parse(FASTA).map((key, val) -> ‘#.: #.’.format(key, val)).join("\n"))</syntaxhighlight>


{{out}}
{{out}}
Line 61: Line 61:
=={{header|Action!}}==
=={{header|Action!}}==
In the following solution the input file [https://gitlab.com/amarok8bit/action-rosetta-code/-/blob/master/source/fasta.txt fasta.txt] is loaded from H6 drive. Altirra emulator automatically converts CR/LF character from ASCII into 155 character in ATASCII charset used by Atari 8-bit computer when one from H6-H10 hard drive under DOS 2.5 is used.
In the following solution the input file [https://gitlab.com/amarok8bit/action-rosetta-code/-/blob/master/source/fasta.txt fasta.txt] is loaded from H6 drive. Altirra emulator automatically converts CR/LF character from ASCII into 155 character in ATASCII charset used by Atari 8-bit computer when one from H6-H10 hard drive under DOS 2.5 is used.
<lang Action!>PROC ReadFastaFile(CHAR ARRAY fname)
<syntaxhighlight lang="action!">PROC ReadFastaFile(CHAR ARRAY fname)
CHAR ARRAY line(256)
CHAR ARRAY line(256)
CHAR ARRAY tmp(256)
CHAR ARRAY tmp(256)
Line 90: Line 90:


ReadFastaFile(fname)
ReadFastaFile(fname)
RETURN</lang>
RETURN</syntaxhighlight>
{{out}}
{{out}}
[https://gitlab.com/amarok8bit/action-rosetta-code/-/raw/master/images/FASTA_format.png Screenshot from Atari 8-bit computer]
[https://gitlab.com/amarok8bit/action-rosetta-code/-/raw/master/images/FASTA_format.png Screenshot from Atari 8-bit computer]
Line 102: Line 102:
The simple solution just reads the file (from standard input) line by line and directly writes it to the standard output.
The simple solution just reads the file (from standard input) line by line and directly writes it to the standard output.


<lang Ada>with Ada.Text_IO; use Ada.Text_IO;
<syntaxhighlight lang="ada">with Ada.Text_IO; use Ada.Text_IO;


procedure Simple_FASTA is
procedure Simple_FASTA is
Line 129: Line 129:
end loop;
end loop;


end Simple_FASTA;</lang>
end Simple_FASTA;</syntaxhighlight>


{{out}}
{{out}}
Line 142: Line 142:




<lang Ada>with Ada.Text_IO, Ada.Containers.Indefinite_Ordered_Maps; use Ada.Text_IO;
<syntaxhighlight lang="ada">with Ada.Text_IO, Ada.Containers.Indefinite_Ordered_Maps; use Ada.Text_IO;


procedure FASTA is
procedure FASTA is
Line 187: Line 187:
Map.Iterate(Process => Print_Pair'Access); -- print Map
Map.Iterate(Process => Print_Pair'Access); -- print Map
end FASTA;</lang>
end FASTA;</syntaxhighlight>


=={{header|Aime}}==
=={{header|Aime}}==


<lang aime>file f;
<syntaxhighlight lang="aime">file f;
text n, s;
text n, s;


Line 205: Line 205:
}
}


o_(n);</lang>
o_(n);</syntaxhighlight>
{{Out}}
{{Out}}
<pre>>Rosetta_Example_1: THERECANBENOSPACE
<pre>>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</pre>
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</pre>

=={{header|ALGOL 68}}==
{{Trans|ALGOL W}}
<syntaxhighlight lang="algol68">
BEGIN # read FASTA format data from standard input and write the results to #
# standard output - only the ">" line start is handled #

BOOL at eof := FALSE;
on logical file end( stand in, ( REF FILE f )BOOL: at eof := TRUE );

WHILE STRING line;
read( ( line, newline ) );
NOT at eof
DO
IF line /= "" THEN # non-empty line #
INT start := LWB line;
BOOL is heading = line[ start ] = ">"; # check for heading line #
IF is heading THEN
print( ( newline ) );
start +:= 1
FI;
print( ( line[ start : ] ) );
IF is heading THEN print( ( ": " ) ) FI
FI
OD
END
</syntaxhighlight>
{{out}}
<pre>
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>


=={{header|ALGOL W}}==
=={{header|ALGOL W}}==
<lang algolw>begin
<syntaxhighlight lang="algolw">begin
% reads FASTA format data from standard input and write the results to standard output %
% reads FASTA format data from standard input and write the results to standard output %
% only handles the ">" line start %
% only handles the ">" line start %
Line 236: Line 268:
readcard( line );
readcard( line );
end while_not_eof
end while_not_eof
end.</lang>
end.</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 245: Line 277:
=={{header|Arturo}}==
=={{header|Arturo}}==


<lang rebol>parseFasta: function [data][
<syntaxhighlight lang="rebol">parseFasta: function [data][
result: #[]
result: #[]
current: ø
current: ø
Line 268: Line 300:
}
}


inspect.muted parseFasta text</lang>
inspect.muted parseFasta text</syntaxhighlight>


{{out}}
{{out}}
Line 278: Line 310:


=={{header|AutoHotkey}}==
=={{header|AutoHotkey}}==
<lang AutoHotkey>Data =
<syntaxhighlight lang="autohotkey">Data =
(
(
>Rosetta_Example_1
>Rosetta_Example_1
Line 291: Line 323:
Gui, add, Edit, w700, % Data
Gui, add, Edit, w700, % Data
Gui, show
Gui, show
return</lang>
return</syntaxhighlight>
{{out}}
{{out}}
<pre>>Rosetta_Example_1: THERECANBENOSPACE
<pre>>Rosetta_Example_1: THERECANBENOSPACE
Line 297: Line 329:


=={{header|AWK}}==
=={{header|AWK}}==
<syntaxhighlight lang="awk">
<lang AWK>
# syntax: GAWK -f FASTA_FORMAT.AWK filename
# syntax: GAWK -f FASTA_FORMAT.AWK filename
# stop processing each file when an error is encountered
# stop processing each file when an error is encountered
Line 349: Line 381:
return
return
}
}
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 360: Line 392:
{{works with|QBasic|1.1}}
{{works with|QBasic|1.1}}
{{works with|QuickBasic|4.5}}
{{works with|QuickBasic|4.5}}
<lang qbasic>FUNCTION checkNoSpaces (s$)
<syntaxhighlight lang="qbasic">FUNCTION checkNoSpaces (s$)
FOR i = 1 TO LEN(s$) - 1
FOR i = 1 TO LEN(s$) - 1
IF MID$(s$, i, 1) = CHR$(32) OR MID$(s$, i, 1) = CHR$(9) THEN checkNoSpaces = 0
IF MID$(s$, i, 1) = CHR$(32) OR MID$(s$, i, 1) = CHR$(9) THEN checkNoSpaces = 0
Line 389: Line 421:
END IF
END IF
LOOP
LOOP
CLOSE #1</lang>
CLOSE #1</syntaxhighlight>


==={{header|True BASIC}}===
==={{header|True BASIC}}===
{{trans|QBasic}}
{{trans|QBasic}}
<lang qbasic>DEF EOF(f)
<syntaxhighlight lang="qbasic">DEF EOF(f)
IF END #f THEN LET EOF = -1 ELSE LET EOF = 0
IF END #f THEN LET EOF = -1 ELSE LET EOF = 0
END DEF
END DEF
Line 426: Line 458:
LOOP
LOOP
CLOSE #1
CLOSE #1
END</lang>
END</syntaxhighlight>


=={{header|BASIC256}}==
=={{header|BASIC256}}==
<lang BASIC256>open 1, "input.fasta"
<syntaxhighlight lang="basic256">open 1, "input.fasta"


first = True
first = True
Line 461: Line 493:
next i
next i
return True
return True
end function</lang>
end function</syntaxhighlight>


=={{header|C}}==
=={{header|C}}==
<lang c>#include <stdio.h>
<syntaxhighlight lang="c">#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#include <string.h>
Line 501: Line 533:
free(line);
free(line);
exit(EXIT_SUCCESS);
exit(EXIT_SUCCESS);
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 508: Line 540:
=={{header|C sharp|C#}}==
=={{header|C sharp|C#}}==


<lang csharp>using System;
<syntaxhighlight lang="csharp">using System;
using System.Collections.Generic;
using System.Collections.Generic;
using System.IO;
using System.IO;
Line 559: Line 591:
Console.ReadLine();
Console.ReadLine();
}
}
}</lang>
}</syntaxhighlight>


=={{header|C++}}==
=={{header|C++}}==
<lang cpp>#include <iostream>
<syntaxhighlight lang="cpp">#include <iostream>
#include <fstream>
#include <fstream>


Line 602: Line 634:
return 0;
return 0;
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 610: Line 642:


=={{header|Clojure}}==
=={{header|Clojure}}==
<lang clojure>(defn fasta [pathname]
<syntaxhighlight lang="clojure">(defn fasta [pathname]
(with-open [r (clojure.java.io/reader pathname)]
(with-open [r (clojure.java.io/reader pathname)]
(doseq [line (line-seq r)]
(doseq [line (line-seq r)]
(if (= (first line) \>)
(if (= (first line) \>)
(print (format "%n%s: " (subs line 1)))
(print (format "%n%s: " (subs line 1)))
(print line)))))</lang>
(print line)))))</syntaxhighlight>


=={{header|Common Lisp}}==
=={{header|Common Lisp}}==
<lang lisp>;; * The input file as a parameter
<syntaxhighlight lang="lisp">;; * The input file as a parameter
(defparameter *input* #p"fasta.txt"
(defparameter *input* #p"fasta.txt"
"The input file name.")
"The input file name.")
Line 631: Line 663:
:do (format t "~&~a: " (subseq line 1))
:do (format t "~&~a: " (subseq line 1))
:else
:else
:do (format t "~a" line)))</lang>
:do (format t "~a" line)))</syntaxhighlight>
{{out}}
{{out}}
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 637: Line 669:
=={{header|Crystal}}==
=={{header|Crystal}}==
If you want to run below code online, then paste below code to [https://play.crystal-lang.org/#/cr <b>playground</b>]
If you want to run below code online, then paste below code to [https://play.crystal-lang.org/#/cr <b>playground</b>]
<lang ruby>
<syntaxhighlight lang="ruby">
# create tmp fasta file in /tmp/
# create tmp fasta file in /tmp/
tmpfile = "/tmp/tmp"+Random.rand.to_s+".fasta"
tmpfile = "/tmp/tmp"+Random.rand.to_s+".fasta"
Line 664: Line 696:
# show fasta component
# show fasta component
fasta.each { |k,v| puts "#{k}: #{v}"}
fasta.each { |k,v| puts "#{k}: #{v}"}
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 672: Line 704:
=={{header|Delphi}}==
=={{header|Delphi}}==
See [https://rosettacode.org/wiki/FASTA_format#Pascal Pascal].
See [https://rosettacode.org/wiki/FASTA_format#Pascal Pascal].

=={{header|EasyLang}}==
<syntaxhighlight>
repeat
s$ = input
until s$ = ""
if substr s$ 1 1 = ">"
if stat = 1
print ""
.
stat = 1
print s$
else
write s$
.
.
input_data
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED

</syntaxhighlight>

=={{header|F_Sharp|F#}}==
<syntaxhighlight lang="fsharp">
//FASTA format. Nigel Galloway: March 23rd., 2023.
let fN(g:string)=match g[0] with '>'->printfn "\n%s:" g[1..] |_->printf "%s" g
let lines=seq{use n=System.IO.File.OpenText("testFASTA.txt") in while not n.EndOfStream do yield n.ReadLine()}
printfn "%s:" ((Seq.head lines)[1..]); Seq.tail lines|>Seq.iter fN; printfn ""
</syntaxhighlight>
{{out}}
<pre>
Rosetta_Example_1:
THERECANBENOSPACE
Rosetta_Example_2:
THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>


=={{header|Factor}}==
=={{header|Factor}}==
<lang factor>USING: formatting io kernel sequences ;
<syntaxhighlight lang="factor">USING: formatting io kernel sequences ;
IN: rosetta-code.fasta
IN: rosetta-code.fasta


Line 683: Line 755:
readln rest "%s: " printf [ process-fasta-line ] each-line ;
readln rest "%s: " printf [ process-fasta-line ] each-line ;


MAIN: main</lang>
MAIN: main</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 692: Line 764:
=={{header|Forth}}==
=={{header|Forth}}==
Developed with gforth 0.7.9
Developed with gforth 0.7.9
<lang forth>1024 constant max-Line
<syntaxhighlight lang="forth">1024 constant max-Line
char > constant marker
char > constant marker


Line 708: Line 780:
cr ;
cr ;
Test
Test
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 718: Line 790:
This program sticks to the task as described in the heading and doesn't allow for any of the (apparently) obsolete
This program sticks to the task as described in the heading and doesn't allow for any of the (apparently) obsolete
practices described in the Wikipedia article :
practices described in the Wikipedia article :
<lang freebasic>' FB 1.05.0 Win64
<syntaxhighlight lang="freebasic">' FB 1.05.0 Win64


Function checkNoSpaces(s As String) As Boolean
Function checkNoSpaces(s As String) As Boolean
Line 755: Line 827:
Print : Print
Print : Print
Print "Press any key to quit"
Print "Press any key to quit"
Sleep</lang>
Sleep</syntaxhighlight>


{{out}}
{{out}}
Line 764: Line 836:


=={{header|Gambas}}==
=={{header|Gambas}}==
<lang gambas>Public Sub Main()
<syntaxhighlight lang="gambas">Public Sub Main()
Dim sList As String = File.Load("../FASTA")
Dim sList As String = File.Load("../FASTA")
Dim sTemp, sOutput As String
Dim sTemp, sOutput As String
Line 779: Line 851:
Print sOutput
Print sOutput


End</lang>
End</syntaxhighlight>
Output:
Output:
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 786: Line 858:


=={{header|Go}}==
=={{header|Go}}==
<lang go>package main
<syntaxhighlight lang="go">package main


import (
import (
Line 828: Line 900:
fmt.Println(err)
fmt.Println(err)
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 842: Line 914:
We parse FASTA by hand (generally not a recommended approach). We use the fact that groupBy walks the list from the head and groups the items by a predicate; here we first concatenate all the fasta strings and then pair those with each respective name.
We parse FASTA by hand (generally not a recommended approach). We use the fact that groupBy walks the list from the head and groups the items by a predicate; here we first concatenate all the fasta strings and then pair those with each respective name.


<lang haskell>import Data.List ( groupBy )
<syntaxhighlight lang="haskell">import Data.List ( groupBy )


parseFasta :: FilePath -> IO ()
parseFasta :: FilePath -> IO ()
Line 858: Line 930:
pair :: [String] -> [(String, String)]
pair :: [String] -> [(String, String)]
pair [] = []
pair [] = []
pair (x : y : xs) = (drop 1 x, y) : pair xs</lang>
pair (x : y : xs) = (drop 1 x, y) : pair xs</syntaxhighlight>


{{out}}
{{out}}
Line 868: Line 940:
We parse FASTA using parser combinators. Normally you'd use something like Trifecta or Parsec, but here we use ReadP, because it is simple and also included in ghc by default. With other parsing libraries the code would be almost the same.
We parse FASTA using parser combinators. Normally you'd use something like Trifecta or Parsec, but here we use ReadP, because it is simple and also included in ghc by default. With other parsing libraries the code would be almost the same.


<lang haskell>import Text.ParserCombinators.ReadP
<syntaxhighlight lang="haskell">import Text.ParserCombinators.ReadP
import Control.Applicative ( (<|>) )
import Control.Applicative ( (<|>) )
import Data.Char ( isAlpha, isAlphaNum )
import Data.Char ( isAlpha, isAlphaNum )
Line 885: Line 957:
name = char '>' *> many (satisfy isAlphaNum <|> char '_') <* newline
name = char '>' *> many (satisfy isAlphaNum <|> char '_') <* newline
code = concat <$> many (many (satisfy isAlpha) <* newline)
code = concat <$> many (many (satisfy isAlpha) <* newline)
newline = char '\n'</lang>
newline = char '\n'</syntaxhighlight>


{{out}}
{{out}}
Line 893: Line 965:
=={{header|J}}==
=={{header|J}}==
Needs chunking to handle huge files.
Needs chunking to handle huge files.
<lang j>require 'strings' NB. not needed for J versions greater than 6.
<syntaxhighlight lang="j">require 'strings' NB. not needed for J versions greater than 6.
parseFasta=: ((': ' ,~ LF&taketo) , (LF -.~ LF&takeafter));._1</lang>
parseFasta=: ((': ' ,~ LF&taketo) , (LF -.~ LF&takeafter));._1</syntaxhighlight>
'''Example Usage'''
'''Example Usage'''
<lang j> Fafile=: noun define
<syntaxhighlight lang="j"> Fafile=: noun define
>Rosetta_Example_1
>Rosetta_Example_1
THERECANBENOSPACE
THERECANBENOSPACE
Line 906: Line 978:
parseFasta Fafile
parseFasta Fafile
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</lang>
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</syntaxhighlight>

Nowadays, most machines have gigabytes of memory. However, if it's necessary to process FASTA content on a system with inadequate memory we can use files to hold intermediate results. For example:

<syntaxhighlight lang="j">bs=: 2
chunkFasta=: {{
r=. EMPTY
bad=. a.-.a.{~;48 65 97(+i.)each 10 26 26
dir=. x,'/'
off=. 0
siz=. fsize y
block=. dest=. ''
while. off < siz do.
block=. block,fread y;off([, [ -~ siz<.+)bs
off=. off+bs
while. LF e. block do.
line=. LF taketo block
select. {.line
case. ';' do.
case. '>' do.
start=. }.line-.CR
r=.r,(head=. name,'.head');<name=. dir,start -. bad
start fwrite head
'' fwrite name
case. do.
(line-.bad) fappend name
end.
block=. LF takeafter block
end.
end.
r
}}</syntaxhighlight>

Here, we're using a block size of 2 bytes, to illustrate correctness. If speed matters, we should use something significantly larger.

The left argument to <code>chunkFasta</code> names the directory used to hold content extracted from the FASTA file. The right argument names that FASTA file. The result identifies the extracted headers and contents

Thus, if '~/fasta.txt' contains the example file for this task and we want to store intermediate results in the '~temp' directory, we could use:

<syntaxhighlight lang="j"> fasta=: '~temp' chunkFasta '~/fasta.txt'</syntaxhighlight>

And, to complete the task:

<syntaxhighlight lang="j"> ;(,': ',,&LF)each/"1 fread each fasta
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</syntaxhighlight>


=={{header|Java}}==
=={{header|Java}}==
This implementation presumes the data-file is well-formed
<syntaxhighlight lang="java">
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
</syntaxhighlight>
<syntaxhighlight lang="java">
public static void main(String[] args) throws IOException {
List<FASTA> fastas = readFile("fastas.txt");
for (FASTA fasta : fastas)
System.out.println(fasta);
}

static List<FASTA> readFile(String path) throws IOException {
try (BufferedReader reader = new BufferedReader(new FileReader(path))) {
List<FASTA> list = new ArrayList<>();
StringBuilder lines = null;
String newline = System.lineSeparator();
String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith(">")) {
if (lines != null)
list.add(parseFASTA(lines.toString()));
lines = new StringBuilder();
lines.append(line).append(newline);
} else {
lines.append(line);
}
}
list.add(parseFASTA(lines.toString()));
return list;
}
}

static FASTA parseFASTA(String string) {
String description;
char[] sequence;
int indexOf = string.indexOf(System.lineSeparator());
description = string.substring(1, indexOf);
/* using 'stripLeading' will remove any additional line-separators */
sequence = string.substring(indexOf + 1).stripLeading().toCharArray();
return new FASTA(description, sequence);
}

/* using a 'char' array seems more logical */
record FASTA(String description, char[] sequence) {
@Override
public String toString() {
return "%s: %s".formatted(description, new String(sequence));
}
}
</syntaxhighlight>
<pre>
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>

<br />
An alternate demonstration
{{trans|D}}
{{trans|D}}
{{works with|Java|7}}
{{works with|Java|7}}
<lang java>import java.io.*;
<syntaxhighlight lang="java">import java.io.*;
import java.util.Scanner;
import java.util.Scanner;


Line 936: Line 1,114:
System.out.println();
System.out.println();
}
}
}</lang>
}</syntaxhighlight>


<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 944: Line 1,122:
=={{header|JavaScript}}==
=={{header|JavaScript}}==
The code below uses Nodejs to read the file.
The code below uses Nodejs to read the file.
<syntaxhighlight lang="javascript">
<lang JavaScript>
const fs = require("fs");
const fs = require("fs");
const readline = require("readline");
const readline = require("readline");
Line 973: Line 1,151:


readInterface.on("close", () => process.stdout.write("\n"));
readInterface.on("close", () => process.stdout.write("\n"));
</syntaxhighlight>
</lang>


<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 984: Line 1,162:
in each cycle, only as many lines are read as are required to compose an output line. <br>
in each cycle, only as many lines are read as are required to compose an output line. <br>
Notice that an additional ">" must be provided to "foreach" to ensure the final block of lines of the input file are properly assembled.
Notice that an additional ">" must be provided to "foreach" to ensure the final block of lines of the input file are properly assembled.
<syntaxhighlight lang="jq">
<lang jq>
def fasta:
def fasta:
foreach (inputs, ">") as $line
foreach (inputs, ">") as $line
Line 995: Line 1,173:
;
;


fasta</lang>
fasta</syntaxhighlight>
{{out}}
{{out}}
<lang sh>$ jq -n -R -r -f FASTA_format.jq < FASTA_format.fasta
<syntaxhighlight lang="sh">$ jq -n -R -r -f FASTA_format.jq < FASTA_format.fasta
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</lang>
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED</syntaxhighlight>


=={{header|Julia}}==
=={{header|Julia}}==
{{works with|Julia|0.6}}
{{works with|Julia|0.6}}


<lang julia>for line in eachline("data/fasta.txt")
<syntaxhighlight lang="julia">for line in eachline("data/fasta.txt")
if startswith(line, '>')
if startswith(line, '>')
print(STDOUT, "\n$(line[2:end]): ")
print(STDOUT, "\n$(line[2:end]): ")
Line 1,010: Line 1,188:
print(STDOUT, "$line")
print(STDOUT, "$line")
end
end
end</lang>
end</syntaxhighlight>


=={{header|Kotlin}}==
=={{header|Kotlin}}==
{{trans|FreeBASIC}}
{{trans|FreeBASIC}}
<lang scala>// version 1.1.2
<syntaxhighlight lang="scala">// version 1.1.2


import java.util.Scanner
import java.util.Scanner
Line 1,043: Line 1,221:
}
}
sc.close()
sc.close()
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 1,052: Line 1,230:


=={{header|Lua}}==
=={{header|Lua}}==
<lang lua>local file = io.open("input.txt","r")
<syntaxhighlight lang="lua">local file = io.open("input.txt","r")
local data = file:read("*a")
local data = file:read("*a")
file:close()
file:close()
Line 1,075: Line 1,253:
for k,v in pairs(output) do
for k,v in pairs(output) do
print(k..": "..v)
print(k..": "..v)
end</lang>
end</syntaxhighlight>


{{out}}
{{out}}
Line 1,089: Line 1,267:




<syntaxhighlight lang="m2000 interpreter">
<lang M2000 Interpreter>
Module CheckIt {
Module CheckIt {
Class FASTA_MACHINE {
Class FASTA_MACHINE {
Line 1,193: Line 1,371:
}
}
checkit
checkit
</syntaxhighlight>
</lang>


=={{header|Mathematica}}/{{header|Wolfram Language}}==
=={{header|Mathematica}}/{{header|Wolfram Language}}==
Mathematica has built-in support for FASTA files and strings
Mathematica has built-in support for FASTA files and strings
<lang Mathematica>ImportString[">Rosetta_Example_1
<syntaxhighlight lang="mathematica">ImportString[">Rosetta_Example_1
THERECANBENOSPACE
THERECANBENOSPACE
>Rosetta_Example_2
>Rosetta_Example_2
Line 1,203: Line 1,381:
LINESBUTTHEYALLMUST
LINESBUTTHEYALLMUST
BECONCATENATED
BECONCATENATED
", "FASTA"]</lang>
", "FASTA"]</syntaxhighlight>
{{out}}
{{out}}
<pre>{"THERECANBENOSPACE", "THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED"}</pre>
<pre>{"THERECANBENOSPACE", "THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED"}</pre>


=={{header|Nim}}==
=={{header|Nim}}==
<syntaxhighlight lang="nim">
<lang Nim>
import strutils
import strutils


Line 1,229: Line 1,407:


fasta(input)
fasta(input)
</syntaxhighlight>
</lang>
{{out}}
<pre>
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>
=={{header|Oberon}}==
Works with A2 Oberon.

<syntaxhighlight lang="Oberon">
MODULE Fasta;

IMPORT Files, Streams, Strings, Commands;

PROCEDURE PrintOn*(filename: ARRAY OF CHAR; wr: Streams.Writer);
VAR
rd: Files.Reader;
f: Files.File;
line: ARRAY 1024 OF CHAR;
res: BOOLEAN;
BEGIN
f := Files.Old(filename);
ASSERT(f # NIL);
NEW(rd,f,0);
res := rd.GetString(line);
WHILE rd.res # Streams.EOF DO
IF line[0] = '>' THEN
wr.Ln;
wr.String(Strings.Substring2(1,line)^);
wr.String(": ")
ELSE
wr.String(line)
END;
res := rd.GetString(line)
END
END PrintOn;

PROCEDURE Do*;
VAR
ctx: Commands.Context;
filename: ARRAY 256 OF CHAR;
res: BOOLEAN
BEGIN
ctx := Commands.GetContext();
res := ctx.arg.GetString(filename);
PrintOn(filename,ctx.out)
END Do;

END Fasta.
</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 1,237: Line 1,464:


=={{header|Objeck}}==
=={{header|Objeck}}==
<lang objeck>class Fasta {
<syntaxhighlight lang="objeck">class Fasta {
function : Main(args : String[]) ~ Nil {
function : Main(args : String[]) ~ Nil {
if(args->Size() = 1) {
if(args->Size() = 1) {
Line 1,262: Line 1,489:
}
}
}
}
</syntaxhighlight>
</lang>


{{out}}
{{out}}
Line 1,275: Line 1,502:
The program reads and processes the input one line at a time, and directly prints out the chunk of data available. The long strings are not concatenated in memory but just examined and processed as necessary: either printed out as is in the case of part of a sequence, or formatted in the case of the name (what I call the label), and managing the new lines where needed.
The program reads and processes the input one line at a time, and directly prints out the chunk of data available. The long strings are not concatenated in memory but just examined and processed as necessary: either printed out as is in the case of part of a sequence, or formatted in the case of the name (what I call the label), and managing the new lines where needed.
{{works with|OCaml|4.03+}}
{{works with|OCaml|4.03+}}
<lang ocaml>
<syntaxhighlight lang="ocaml">
(* This program reads from the standard input and writes to standard output.
(* This program reads from the standard input and writes to standard output.
* Examples of use:
* Examples of use:
Line 1,316: Line 1,543:
let () =
let () =
print_fasta stdin
print_fasta stdin
</syntaxhighlight>
</lang>
{{out}}
{{out}}
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_1: THERECANBENOSPACE
Line 1,322: Line 1,549:


=={{header|Pascal}}==
=={{header|Pascal}}==
<syntaxhighlight lang="pascal">
<lang Pascal>
program FASTA_Format;
program FASTA_Format;
// FPC 3.0.2
// FPC 3.0.2
Line 1,362: Line 1,589:
Close(InF);
Close(InF);
end.
end.
</syntaxhighlight>
</lang>


FASTA_Format < test.fst
FASTA_Format < test.fst
Line 1,371: Line 1,598:


=={{header|Perl}}==
=={{header|Perl}}==
<lang perl>my $fasta_example = <<'END_FASTA_EXAMPLE';
<syntaxhighlight lang="perl">my $fasta_example = <<'END_FASTA_EXAMPLE';
>Rosetta_Example_1
>Rosetta_Example_1
THERECANBENOSPACE
THERECANBENOSPACE
Line 1,389: Line 1,616:
print;
print;
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 1,397: Line 1,624:


=={{header|Phix}}==
=={{header|Phix}}==
<!--<lang Phix>(notonline)-->
<!--<syntaxhighlight lang="phix">(notonline)-->
<span style="color: #004080;">bool</span> <span style="color: #000000;">first</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">first</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"fasta.txt"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"r"</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"fasta.txt"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"r"</span><span style="color: #0000FF;">)</span>
Line 1,421: Line 1,648:
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #7060A8;">close</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">close</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span>
<!--</lang>-->
<!--</syntaxhighlight>-->
{{out}}
{{out}}
<pre>
<pre>
Line 1,429: Line 1,656:


=={{header|PicoLisp}}==
=={{header|PicoLisp}}==
<lang PicoLisp>(de fasta (F)
<syntaxhighlight lang="picolisp">(de fasta (F)
(in F
(in F
(while (from ">")
(while (from ">")
Line 1,436: Line 1,663:
(prin (line T)) )
(prin (line T)) )
(prinl) ) ) )
(prinl) ) ) )
(fasta "fasta.dat")</lang>
(fasta "fasta.dat")</syntaxhighlight>
{{out}}
<pre>
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>

=={{header|PL/M}}==
{{works with|8080 PL/M Compiler}} ... under CP/M (or an emulator)
Reads the data from the file named on the command line, e.g., if the program is stored in D:FASTA.COM and the data in D:FSTAIN.TXT, the following could be used: <code>D:FASTA D:FASTAIN.TXT</code>.<br>
Restarts CP/M when the program finishes.
<syntaxhighlight lang="plm">
100H: /* DISPLAY THE CONTENTS OF A FASTA FORMT FILE */

DECLARE FALSE LITERALLY '0', TRUE LITERALLY '0FFH';
DECLARE NL$CHAR LITERALLY '0AH'; /* NEWLINE: CHAR 10 */
DECLARE CR$CHAR LITERALLY '0DH'; /* CARRIAGE RETURN, CHAR 13 */
DECLARE EOF$CHAR LITERALLY '26'; /* EOF: CTRL-Z */
/* CP/M BDOS SYSTEM CALL, RETURNS A VALUE */
BDOS: PROCEDURE( FN, ARG )BYTE; DECLARE FN BYTE, ARG ADDRESS; GOTO 5; END;
/* CP/M BDOS SYSTEM CALL, NO RETURN VALUE */
BDOS$P: PROCEDURE( FN, ARG ); DECLARE FN BYTE, ARG ADDRESS; GOTO 5; END;
EXIT: PROCEDURE; CALL BDOS$P( 0, 0 ); END; /* CP/M SYSTEM RESET */
PR$CHAR: PROCEDURE( C ); DECLARE C BYTE; CALL BDOS$P( 2, C ); END;
PR$STRING: PROCEDURE( S ); DECLARE S ADDRESS; CALL BDOS$P( 9, S ); END;
PR$NL: PROCEDURE; CALL PR$STRING( .( 0DH, NL$CHAR, '$' ) ); END;
FL$EXISTS: PROCEDURE( FCB )BYTE; /* RETURNS TRUE IF THE FILE NAMED IN THE */
DECLARE FCB ADDRESS; /* FCB EXISTS */
RETURN ( BDOS( 17, FCB ) < 4 );
END FL$EXISTS ;
FL$OPEN: PROCEDURE( FCB )BYTE; /* OPEN THE FILE WITH THE SPECIFIED FCB */
DECLARE FCB ADDRESS;
RETURN ( BDOS( 15, FCB ) < 4 );
END FL$OPEN;
FL$READ: PROCEDURE( FCB )BYTE; /* READ THE NEXT RECORD FROM FCB */
DECLARE FCB ADDRESS;
RETURN ( BDOS( 20, FCB ) = 0 );
END FL$READ;
FL$CLOSE: PROCEDURE( FCB )BYTE; /* CLOSE THE FILE WITH THE SPECIFIED FCB */
DECLARE FCB ADDRESS;
RETURN ( BDOS( 16, FCB ) < 4 );
END FL$CLOSE;

/* I/O USES FILE CONTROL BLOCKS CONTAINING THE FILE-NAME, POSITION, ETC. */
/* WHEN THE PROGRAM IS RUN, THE CCP WILL FIRST PARSE THE COMMAND LINE AND */
/* PUT THE FIRST PARAMETER IN FCB1, THE SECOND PARAMETER IN FCB2 */
/* BUT FCB2 OVERLAYS THE END OF FCB1 AND THE DMA BUFFER OVERLAYS THE END */
/* OF FCB2 */

DECLARE FCB$SIZE LITERALLY '36'; /* SIZE OF A FCB */
DECLARE FCB1 LITERALLY '5CH'; /* ADDRESS OF FIRST FCB */
DECLARE FCB2 LITERALLY '6CH'; /* ADDRESS OF SECOND FCB */
DECLARE DMA$BUFFER LITERALLY '80H'; /* DEFAULT DMA BUFFER ADDRESS */
DECLARE DMA$SIZE LITERALLY '128'; /* SIZE OF THE DMA BUFFER */

DECLARE F$PTR ADDRESS, F$CHAR BASED F$PTR BYTE;

/* CLEAR THE PARTS OF FCB1 OVERLAYED BY FCB2 */
DO F$PTR = FCB1 + 12 TO FCB1 + ( FCB$SIZE - 1 );
F$CHAR = 0;
END;

/* SHOW THE FASTA DATA, IF THE FILE EXISTS */
IF NOT FL$EXISTS( FCB1 ) THEN DO; /* THE FILE DOES NOT EXIST */
CALL PR$STRING( .'FILE NOT FOUND$' );CALL PR$NL;
END;
ELSE IF NOT FL$OPEN( FCB1 ) THEN DO; /* UNABLE TO OPEN THE FILE */
CALL PR$STRING( .'UNABLE TO OPEN THE FILE$' );CALL PR$NL;
END;
ELSE DO; /* FILE EXISTS AND OPENED OK - ATTEMPT TO SHOW THE DATA */
DECLARE ( BOL, GOT$RCD, IS$HEADING ) BYTE, DMA$END ADDRESS;
DMA$END = DMA$BUFFER + ( DMA$SIZE - 1 );
GOT$RCD = FL$READ( FCB1 ); /* GET THE FIRST RECORD */
F$PTR = DMA$BUFFER;
BOL = TRUE;
IS$HEADING = FALSE;
DO WHILE GOT$RCD;
IF F$PTR > DMA$END THEN DO; /* END OF BUFFER */
GOT$RCD = FL$READ( FCB1 ); /* GET THE NEXT RECORDD */
F$PTR = DMA$BUFFER;
END;
ELSE IF F$CHAR = NL$CHAR THEN DO; /* END OF LINE */
IF IS$HEADING THEN DO;
CALL PR$STRING( .': $' );
IS$HEADING = FALSE;
END;
BOL = TRUE;
END;
ELSE IF F$CHAR = CR$CHAR THEN DO; END; /* IGNORE CARRIAGE RETURN */
ELSE IF F$CHAR = EOF$CHAR THEN GOT$RCD = FALSE; /* END OF FILE */
ELSE DO; /* HAVE ANOTHER CHARACTER */
IF NOT BOL THEN CALL PR$CHAR( F$CHAR ); /* NOT FIRST CHARACTER */
ELSE DO; /* FIRST CHARACTER - CHECK FOR A HEADING LINE */
BOL = FALSE;
IF IS$HEADING := F$CHAR = '>' THEN CALL PR$NL;
ELSE CALL PR$CHAR( F$CHAR );
END;
END;
F$PTR = F$PTR + 1;
END;
/* CLOSE THE FILE */
IF NOT FL$CLOSE( FCB1 ) THEN DO;
CALL PR$STRING( .'UNABLE TO CLOSE THE FILE$' ); CALL PR$NL;
END;
END;

CALL EXIT;

EOF
</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 1,446: Line 1,782:
When working with a real file, the content of the <code>$file</code> variable would be: <code>Get-Content -Path .\FASTA_file.txt -ReadCount 1000</code>. The <code>-ReadCount</code> parameter value for large files is unknown, yet sure to be a value between 1,000 and 10,000 depending upon the length of file and length of the records in the file. Experimentation is the only way to know the optimum value.
When working with a real file, the content of the <code>$file</code> variable would be: <code>Get-Content -Path .\FASTA_file.txt -ReadCount 1000</code>. The <code>-ReadCount</code> parameter value for large files is unknown, yet sure to be a value between 1,000 and 10,000 depending upon the length of file and length of the records in the file. Experimentation is the only way to know the optimum value.
{{works with|PowerShell|4.0+}}
{{works with|PowerShell|4.0+}}
<syntaxhighlight lang="powershell">
<lang PowerShell>
$file = @'
$file = @'
>Rosetta_Example_1
>Rosetta_Example_1
Line 1,468: Line 1,804:


$output | Format-List
$output | Format-List
</syntaxhighlight>
</lang>
{{Out}}
{{Out}}
<pre>
<pre>
Line 1,476: Line 1,812:


===Version 3.0 Or Less===
===Version 3.0 Or Less===
<syntaxhighlight lang="powershell">
<lang PowerShell>
$file = @'
$file = @'
>Rosetta_Example_1
>Rosetta_Example_1
Line 1,498: Line 1,834:


$output | Format-List
$output | Format-List
</syntaxhighlight>
</lang>
{{Out}}
{{Out}}
<pre>
<pre>
Line 1,506: Line 1,842:


=={{header|PureBasic}}==
=={{header|PureBasic}}==
<lang PureBasic>EnableExplicit
<syntaxhighlight lang="purebasic">EnableExplicit
Define Hdl_File.i,
Define Hdl_File.i,
Frm_File.i,
Frm_File.i,
Line 1,534: Line 1,870:
CloseFile(Hdl_File)
CloseFile(Hdl_File)
Input()
Input()
EndIf</lang>
EndIf</syntaxhighlight>
{{out}}
{{out}}
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 1,544: Line 1,880:
and I use a generator expression yielding key, value pairs
and I use a generator expression yielding key, value pairs
as soon as they are read, keeping the minimum in memory.
as soon as they are read, keeping the minimum in memory.
<lang python>import io
<syntaxhighlight lang="python">import io


FASTA='''\
FASTA='''\
Line 1,568: Line 1,904:
yield key, val
yield key, val


print('\n'.join('%s: %s' % keyval for keyval in fasta_parse(infile)))</lang>
print('\n'.join('%s: %s' % keyval for keyval in fasta_parse(infile)))</syntaxhighlight>


{{out}}
{{out}}
Line 1,575: Line 1,911:


=={{header|R}}==
=={{header|R}}==
<lang rsplus>
<syntaxhighlight lang="rsplus">
library("seqinr")
library("seqinr")


Line 1,588: Line 1,924:
cat(attr(aline, 'Annot'), ":", aline, "\n")
cat(attr(aline, 'Annot'), ":", aline, "\n")
}
}
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 1,596: Line 1,932:


=={{header|Racket}}==
=={{header|Racket}}==
<lang racket>
<syntaxhighlight lang="racket">
#lang racket
#lang racket
(let loop ([m #t])
(let loop ([m #t])
Line 1,606: Line 1,942:
(current-output-port)))))
(current-output-port)))))
(newline)
(newline)
</syntaxhighlight>
</lang>


=={{header|Raku}}==
=={{header|Raku}}==
(formerly Perl 6)
(formerly Perl 6)
<lang perl6>grammar FASTA {
<syntaxhighlight lang="raku" line>grammar FASTA {


rule TOP { <entry>+ }
rule TOP { <entry>+ }
Line 1,630: Line 1,966:
for $/<entry>[] {
for $/<entry>[] {
say ~.<title>, " : ", .<sequence>.made;
say ~.<title>, " : ", .<sequence>.made;
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>Rosetta_Example_1 : THERECANBENOSPACE
<pre>Rosetta_Example_1 : THERECANBENOSPACE
Line 1,639: Line 1,975:
===version 1===
===version 1===
This REXX version correctly processes the examples shown.
This REXX version correctly processes the examples shown.
<lang rexx>/*REXX program reads a (bio-informational) FASTA file and displays the contents. */
<syntaxhighlight lang="rexx">/*REXX program reads a (bio-informational) FASTA file and displays the contents. */
parse arg iFID . /*iFID: the input file to be read. */
Parse Arg ifid . /* iFID: the input file to be read */
If ifid=='' Then
if iFID=='' then iFID='FASTA.IN' /*Not specified? Then use the default.*/
name= /*the name of an output file (so far). */
ifid='FASTA.IN' /* Not specified? Then use the default */
$= /*the value of the output file's stuff.*/
name='' /* the name of an output file (so far) */
do while lines(iFID)\==0 /*process the FASTA file contents. */
d='' /* the value of the output file's */
x=strip( linein(iFID), 'T') /*read a line (a record) from the file,*/
Do While lines(ifid)\==0 /* process the FASTA file contents */
x=strip(linein(ifid),'T') /* read a line (a record) from the input */
/*───────── and strip trailing blanks. */
/* and strip trailing blanks */
if left(x, 1)=='>' then do
if $\=='' then say name':' $
If left(x,1)=='>' Then Do /* a new file id */
name=substr(x, 2)
Call out /* show output name and data */
$=
name=substr(x,2) /* and get the new (or first) output name */
end
d='' /* start with empty contents */
End
else $=$ || x
end /*j*/ /* [↓] show output of last file used. */
Else /* a line with data */
if $\=='' then say name':' $ /*stick a fork in it, we're all done. */</lang>
d=d||x /* append it to output */
End
Call out /* show output of last file used. */
Exit

out:
If d\=='' Then /* if there ara data */
Say name':' d /* show output name and data */
Return</syntaxhighlight>
{{out|output|text=&nbsp; when using the default input filename:}}
{{out|output|text=&nbsp; when using the default input filename:}}
<pre>
<pre>
Line 1,667: Line 2,011:
::* &nbsp; sequences that contain blanks, tabs, and other whitespace
::* &nbsp; sequences that contain blanks, tabs, and other whitespace
::* &nbsp; sequence names that are identified with a semicolon &nbsp; [''';''']
::* &nbsp; sequence names that are identified with a semicolon &nbsp; [''';''']
<lang rexx>/*REXX program reads a (bio-informational) FASTA file and displays the contents. */
<syntaxhighlight lang="rexx">/*REXX program reads a (bio-informational) FASTA file and displays the contents. */
parse arg iFID . /*iFID: the input file to be read. */
Parse Arg iFID . /*iFID: the input file to be read. */
if iFID=='' then iFID='FASTA2.IN' /*Not specified? Then use the default.*/
If iFID=='' Then iFID='FASTA2.IN' /*Not specified? Then use the default.*/
name= /*the name of an output file (so far). */
name='' /*the name of an output file (so far). */
data=''
$= /*the value of the output file's stuff.*/
do while lines(iFID)\==0 /*process the FASTA file contents. */
/*the value of the output file's stuff.*/
x=strip( linein(iFID), 'T') /*read a line (a record) from the file,*/
Do While lines(iFID)\==0 /*process the FASTA file contents. */
x=strip(linein(iFID),'T') /*read a line (a record) from the file,*/
/*───────── and strip trailing blanks. */
if x=='' then iterate /*If the line is all blank, ignore it. */
/*--------- and strip trailing blanks. */
Select
if left(x, 1)==';' then do
if name=='' then name=substr(x,2)
When x=='' Then /* If the line is all blank, */
say x
Nop /* ignore it. */
When left(x,1)==';' Then Do
iterate
If name=='' Then name=substr(x,2)
end
if left(x, 1)=='>' then do
Say x
End
if $\=='' then say name':' $
When left(x,1)=='>' Then Do
name=substr(x, 2)
If data\=='' Then
$=
end
Say name':' data
name=substr(x,2)
else $=space($ || translate(x, , '*'), 0)
data=''
end /*j*/ /* [↓] show output of last file used. */
End
if $\=='' then say name':' $ /*stick a fork in it, we're all done. */</lang>
Otherwise
data=space(data||translate(x, ,'*'),0)
End
End
If data\=='' Then
Say name':' data /* [?] show output of last file used. */
</syntaxhighlight>
<pre>
<pre>
'''input:''' &nbsp; The &nbsp; '''FASTA2.IN''' &nbsp; file is shown below:
'''input:''' &nbsp; The &nbsp; '''FASTA2.IN''' &nbsp; file is shown below:
Line 1,721: Line 2,072:


=={{header|Ring}}==
=={{header|Ring}}==
<lang ring>
<syntaxhighlight lang="ring">
# Project : FAST format
# Project : FAST format


Line 1,744: Line 2,095:
i = i + 1
i = i + 1
end
end
</syntaxhighlight>
</lang>
Output:
Output:
<pre>
<pre>
Line 1,752: Line 2,103:


=={{header|Ruby}}==
=={{header|Ruby}}==
<lang ruby>def fasta_format(strings)
<syntaxhighlight lang="ruby">def fasta_format(strings)
out, text = [], ""
out, text = [], ""
strings.split("\n").each do |line|
strings.split("\n").each do |line|
Line 1,774: Line 2,125:
EOS
EOS


puts fasta_format(data)</lang>
puts fasta_format(data)</syntaxhighlight>


{{out}}
{{out}}
Line 1,783: Line 2,134:


=={{header|Run BASIC}}==
=={{header|Run BASIC}}==
<lang runbasic>a$ = ">Rosetta_Example_1
<syntaxhighlight lang="runbasic">a$ = ">Rosetta_Example_1
THERECANBENOSPACE
THERECANBENOSPACE
>Rosetta_Example_2
>Rosetta_Example_2
Line 1,800: Line 2,151:
end if
end if
i = i + 1
i = i + 1
wend</lang>
wend</syntaxhighlight>
{{out}}
{{out}}
<pre>>Rosetta_Example_1: THERECANBENOSPACE
<pre>>Rosetta_Example_1: THERECANBENOSPACE
Line 1,809: Line 2,160:
This example is implemented using an [https://doc.rust-lang.org/book/iterators.html iterator] to reduce memory requirements and encourage code reuse.
This example is implemented using an [https://doc.rust-lang.org/book/iterators.html iterator] to reduce memory requirements and encourage code reuse.


<lang rust>
<syntaxhighlight lang="rust">
use std::env;
use std::env;
use std::io::{BufReader, Lines};
use std::io::{BufReader, Lines};
Line 1,873: Line 2,224:
}
}
}
}
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 1,879: Line 2,230:


=={{header|Scala}}==
=={{header|Scala}}==
<lang scala>import java.io.File
<syntaxhighlight lang="scala">import java.io.File
import java.util.Scanner
import java.util.Scanner


Line 1,897: Line 2,248:


println("~~~+~~~")
println("~~~+~~~")
}</lang>
}</syntaxhighlight>


=={{header|Scheme}}==
=={{header|Scheme}}==
<lang scheme>(import (scheme base)
<syntaxhighlight lang="scheme">(import (scheme base)
(scheme file)
(scheme file)
(scheme write))
(scheme write))
Line 1,915: Line 2,266:
(display (string-copy line 1)) (display ": "))
(display (string-copy line 1)) (display ": "))
(else ; display the string directly
(else ; display the string directly
(display line))))))</lang>
(display line))))))</syntaxhighlight>
{{out}}
{{out}}
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 1,921: Line 2,272:


=={{header|Seed7}}==
=={{header|Seed7}}==
<lang seed7>$ include "seed7_05.s7i";
<syntaxhighlight lang="seed7">$ include "seed7_05.s7i";


const proc: main is func
const proc: main is func
Line 1,947: Line 2,298:
end if;
end if;
writeln;
writeln;
end func;</lang>
end func;</syntaxhighlight>


{{out}}
{{out}}
Line 1,957: Line 2,308:
=={{header|Sidef}}==
=={{header|Sidef}}==
{{trans|Ruby}}
{{trans|Ruby}}
<lang ruby>func fasta_format(strings) {
<syntaxhighlight lang="ruby">func fasta_format(strings) {
var out = []
var out = []
var text = ''
var text = ''
Line 1,981: Line 2,332:
THERECANBESEVERAL
THERECANBESEVERAL
LINESBUTTHEYALLMUST
LINESBUTTHEYALLMUST
BECONCATENATED</lang>
BECONCATENATED</syntaxhighlight>
{{out}}
<pre>
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>

=={{header|Smalltalk}}==
Works with Pharo Smalltalk
<syntaxhighlight lang="smalltalk">
FileLocator home / aFilename readStreamDo: [ :stream |
[ stream atEnd ] whileFalse: [
| line |
((line := stream nextLine) beginsWith: '>')
ifTrue: [
Transcript
cr;
show: (line copyFrom: 2 to: line size);
show: ': ' ]
ifFalse: [ Transcript show: line ] ] ]
</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 1,989: Line 2,360:


=={{header|Tcl}}==
=={{header|Tcl}}==
<lang tcl>proc fastaReader {filename} {
<syntaxhighlight lang="tcl">proc fastaReader {filename} {
set f [open $filename]
set f [open $filename]
set sep ""
set sep ""
Line 2,004: Line 2,375:
}
}


fastaReader ./rosettacode.fas</lang>
fastaReader ./rosettacode.fas</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 2,013: Line 2,384:
=={{header|TMG}}==
=={{header|TMG}}==
Unix TMG: <!-- C port of TMG processes 1.04 GB FASTA file in 38 seconds on a generic laptop -->
Unix TMG: <!-- C port of TMG processes 1.04 GB FASTA file in 38 seconds on a generic laptop -->
<lang UnixTMG>prog: ignore(spaces)
<syntaxhighlight lang="unixtmg">prog: ignore(spaces)
loop: parse(line)\loop parse(( = {*} ));
loop: parse(line)\loop parse(( = {*} ));
line: ( name | * = {} | seqns );
line: ( name | * = {} | seqns );
Line 2,026: Line 2,397:
spaces: << >>;
spaces: << >>;


f: 1;</lang>
f: 1;</syntaxhighlight>


=={{header|uBasic/4tH}}==
=={{header|uBasic/4tH}}==
<lang>If Cmd (0) < 2 Then Print "Usage: fasta <fasta file>" : End
<syntaxhighlight lang="text">If Cmd (0) < 2 Then Print "Usage: fasta <fasta file>" : End
If Set(a, Open (Cmd(2), "r")) < 0 Then Print "Cannot open \q";Cmd(2);"\q" : End
If Set(a, Open (Cmd(2), "r")) < 0 Then Print "Cannot open \q";Cmd(2);"\q" : End


Line 2,050: Line 2,421:
Local (4)
Local (4)


b@ = Dup("") ' start with an empty string
b@ := "" ' start with an empty string


Do
Do
Line 2,060: Line 2,431:
Loop ' if not add the line to current string
Loop ' if not add the line to current string


Return (b@) ' return the string</lang>
Return (b@) ' return the string</syntaxhighlight>
{{out}}
{{out}}
<pre>Rosetta_Example_1: THERECANBENOSPACE
<pre>Rosetta_Example_1: THERECANBENOSPACE
Line 2,066: Line 2,437:


0 OK, 0:431 </pre>
0 OK, 0:431 </pre>

=={{header|V (Vlang)}}==
<syntaxhighlight lang="Vlang">
const data = (
">Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED"
)

fn main() {
mut i := 0
for i <= data.len {
if data.substr_ni(i, i + 17) == ">Rosetta_Example_" {
print("\n" + data.substr_ni(i, i + 18) + ": ")
i = i + 17
}
else {
if data.substr_ni(i, i + 1) > "\x20" {print(data[i].ascii_str())}
}
i++
}
}
</syntaxhighlight>

{{out}}
<pre>
>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
</pre>

=={{header|Wren}}==
=={{header|Wren}}==
{{trans|Kotlin}}
{{trans|Kotlin}}
More or less.
More or less.
<lang ecmascript>import "io" for File
<syntaxhighlight lang="wren">import "io" for File


var checkNoSpaces = Fn.new { |s| !s.contains(" ") && !s.contains("\t") }
var checkNoSpaces = Fn.new { |s| !s.contains(" ") && !s.contains("\t") }
Line 2,108: Line 2,512:
}
}
}
}
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 2,117: Line 2,521:


=={{header|XPL0}}==
=={{header|XPL0}}==
<lang XPL0>proc Echo; \Echo line of characters from file to screen
<syntaxhighlight lang="xpl0">proc Echo; \Echo line of characters from file to screen
int Ch;
int Ch;
def LF=$0A, EOF=$1A;
def LF=$0A, EOF=$1A;
Line 2,139: Line 2,543:
Echo;
Echo;
];
];
]</lang>
]</syntaxhighlight>


{{out}}
{{out}}
Line 2,149: Line 2,553:


=={{header|zkl}}==
=={{header|zkl}}==
<lang zkl>fcn fasta(data){ // a lazy cruise through a FASTA file
<syntaxhighlight lang="zkl">fcn fasta(data){ // a lazy cruise through a FASTA file
fcn(w){ // one string at a time, -->False garbage at front of file
fcn(w){ // one string at a time, -->False garbage at front of file
line:=w.next().strip();
line:=w.next().strip();
Line 2,156: Line 2,560:
})
})
}.fp(data.walker()) : Utils.Helpers.wap(_);
}.fp(data.walker()) : Utils.Helpers.wap(_);
}</lang>
}</syntaxhighlight>
*This assumes that white space at front or end of string is extraneous (excepting ">" lines).
*This assumes that white space at front or end of string is extraneous (excepting ">" lines).
*Lazy, works for objects that support iterating over lines (ie most).
*Lazy, works for objects that support iterating over lines (ie most).
*The fasta function returns an iterator that wraps a function taking an iterator. Uh, yeah. An initial iterator (Walker) is used to get lines, hold state and do push back when read the start of the next string. The function sucks up one string (using the iterator). The wrapping iterator (wap) traps the exception when the function waltzes off the end of the data and provides API for foreach (etc).
*The fasta function returns an iterator that wraps a function taking an iterator. Uh, yeah. An initial iterator (Walker) is used to get lines, hold state and do push back when read the start of the next string. The function sucks up one string (using the iterator). The wrapping iterator (wap) traps the exception when the function waltzes off the end of the data and provides API for foreach (etc).
FASTA file:
FASTA file:
<lang zkl>foreach l in (fasta(File("fasta.txt"))) { println(l) }</lang>
<syntaxhighlight lang="zkl">foreach l in (fasta(File("fasta.txt"))) { println(l) }</syntaxhighlight>
FASTA data blob:
FASTA data blob:
<lang zkl>data:=Data(0,String,
<syntaxhighlight lang="zkl">data:=Data(0,String,
">Rosetta_Example_1\nTHERECANBENOSPACE\n"
">Rosetta_Example_1\nTHERECANBENOSPACE\n"
">Rosetta_Example_2\nTHERECANBESEVERAL\nLINESBUTTHEYALLMUST\n"
">Rosetta_Example_2\nTHERECANBESEVERAL\nLINESBUTTHEYALLMUST\n"
"BECONCATENATED");
"BECONCATENATED");
foreach l in (fasta(data)) { println(l) }</lang>
foreach l in (fasta(data)) { println(l) }</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>

Latest revision as of 08:53, 4 March 2024

Task
FASTA format
You are encouraged to solve this task according to the task description, using any language you may know.

In bioinformatics, long character strings are often encoded in a format called FASTA.

A FASTA file can contain several strings, each identified by a name marked by a > (greater than) character at the beginning of the line.


Task

Write a program that reads a FASTA file such as:

>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Note that a high-quality implementation will not hold the entire file in memory at once; real FASTA files can be multiple gigabytes in size.

11l

Translation of: Python
V FASTA =
|‘>Rosetta_Example_1
  THERECANBENOSPACE
  >Rosetta_Example_2
  THERECANBESEVERAL
  LINESBUTTHEYALLMUST
  BECONCATENATED’

F fasta_parse(infile_str)
   V key = ‘’
   V val = ‘’
   [(String, String)] r
   L(line) infile_str.split("\n")
      I line.starts_with(‘>’)
         I key != ‘’
            r [+]= (key, val)
         key = line[1..].split_py()[0]
         val = ‘’
      E I key != ‘’
         val ‘’= line
   I key != ‘’
      r [+]= (key, val)
   R r

print(fasta_parse(FASTA).map((key, val) -> ‘#.: #.’.format(key, val)).join("\n"))
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Action!

In the following solution the input file fasta.txt is loaded from H6 drive. Altirra emulator automatically converts CR/LF character from ASCII into 155 character in ATASCII charset used by Atari 8-bit computer when one from H6-H10 hard drive under DOS 2.5 is used.

PROC ReadFastaFile(CHAR ARRAY fname)
  CHAR ARRAY line(256)
  CHAR ARRAY tmp(256)
  BYTE newLine,dev=[1]

  newLine=0
  Close(dev)
  Open(dev,fname,4)
  WHILE Eof(dev)=0
  DO
    InputSD(dev,line)
    IF line(0)>0 AND line(1)='> THEN
      IF newLine THEN
        PutE()
      FI
      newLine=1
      SCopyS(tmp,line,2,line(0))
      Print(tmp) Print(": ")
    ELSE
      Print(line)
    FI
  OD
  Close(dev)
RETURN

PROC Main()
  CHAR ARRAY fname="H6:FASTA.TXT"

  ReadFastaFile(fname)
RETURN
Output:

Screenshot from Atari 8-bit computer

Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATE

Ada

The simple solution just reads the file (from standard input) line by line and directly writes it to the standard output.

with Ada.Text_IO; use Ada.Text_IO;

procedure Simple_FASTA is
   
   Current: Character;
   
begin 
   Get(Current); 
   if Current /= '>' then
      raise Constraint_Error with "'>' expected";
   end if;
   while not End_Of_File loop -- read name and string
      Put(Get_Line & ": "); -- read name and write directly to output
      Read_String:
      loop
	 exit Read_String when End_Of_File; -- end of input
	 Get(Current);
	 if Current = '>' then -- next name
	    New_Line; 
	    exit Read_String;
	 else
	    Put(Current & Get_Line); 
	    -- read part of string and write directly to output
	 end if;
      end loop Read_String;
   end loop;

end Simple_FASTA;
Output:
./simple_fasta < test.txt 
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED


This is a boringly simple text transformation.

The following more complex solution reads the entire file into a map and then prints the data stored in the map. The output is exactly the same. as for the simple text transformation. "Note that a high-quality implementation will not hold the entire file in memory at once; real FASTA files can be multiple gigabytes in size." When processing FASTA files, one may use the input step by step to uptdate an internal data structure and, at the end, to output the answer to a given question. For the task at hand the required output is about the same as the input, thus we store the entire input. For another task, we would not store the entire file. If the task where, e.g., to count the number of characters for each string, we would store (name, number) pairs in our data structure.


with Ada.Text_IO, Ada.Containers.Indefinite_Ordered_Maps; use Ada.Text_IO;

procedure FASTA is
   package Maps is new  Ada.Containers.Indefinite_Ordered_Maps
     (Element_Type => String, Key_Type => String);
   Map: Maps.Map; -- Map holds the full file (as pairs of name and value)
   
   function Get_Value(Previous: String := "") return String is
      Current: Character;
   begin
      if End_Of_File then 
	 return Previous; -- file ends 
      else
	 Get(Current); -- read first character
	 if Current = '>' then -- ah, a new name begins
	    return Previous; -- the string read so far is the value
	 else -- the entire line is part of the value
	    return Get_Value(Previous & Current & Get_Line);
	 end if;
      end if;
   end Get_Value;
   
   procedure Print_Pair(Position: Maps.Cursor) is
   begin
      Put_Line(Maps.Key(Position) & ": " & Maps.Element(Position));
      -- Maps.Key(X) is the name and Maps.Element(X) is the value at X
   end Print_Pair;
   
   Skip_This: String := Get_Value; 
   -- consumes the entire file, until the first line starting with '>'.
   -- the string Skip_This should be empty, but we don't verify this

begin 
   while not End_Of_File loop -- read the file into Map
      declare
	 Name: String := Get_Line;
	   -- reads all characters in the line, except for the first ">"
	 Value: String := Get_Value;
      begin
	 Map.Insert(Key => Name, New_Item => Value);
	 -- adds the pair (Name, Value) to Map
      end;
   end loop;
   
   Map.Iterate(Process => Print_Pair'Access); -- print Map
end FASTA;

Aime

file f;
text n, s;

f.affix(argv(1));

while (f.line(s) ^ -1) {
    if (s[0] == '>') {
        o_(n, s, ": ");
        n = "\n";
    } else {
        o_(s);
    }
}

o_(n);
Output:
>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

ALGOL 68

Translation of: ALGOL W
BEGIN # read FASTA format data from standard input and write the results to  #
      # standard output - only the ">" line start is handled                 #

    BOOL at eof := FALSE;
    on logical file end( stand in, ( REF FILE f )BOOL: at eof := TRUE );

    WHILE STRING line;
          read( ( line, newline ) );
          NOT at eof
    DO
        IF line /= "" THEN                                  # non-empty line #
            INT  start     := LWB line;
            BOOL is heading = line[ start ] = ">";  # check for heading line #
            IF is heading THEN
                print( ( newline ) );
                start +:= 1
            FI;
            print( ( line[ start : ] ) );
            IF is heading THEN print( ( ": " ) ) FI
        FI
    OD
END
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

ALGOL W

begin
    % reads FASTA format data from standard input and write the results to standard output %
    % only handles the ">" line start                                                      %
    string(256) line;
    % allow the program to continue after reaching end-of-file %
    ENDFILE := EXCEPTION( false, 1, 0, false, "EOF" );
    % handle the input %
    readcard( line );
    while not XCPNOTED(ENDFILE) do begin
        % strings are fixed length in Algol W - we need to find the line lengh with trailing spaces removed %
        integer len;
        len := 255;
        while len > 0 and line( len // 1 ) = " " do len := len - 1;
        if len > 0 then begin % non-empty line %
            integer pos;
            pos := 0;
            if line( 0 // 1 ) = ">" then begin % header line %
                write();
                pos := 1;
            end if_header_line ;
            for cPos := pos until len do writeon( line( cPos // 1 ) );
            if line( 0 // 1 ) = ">" then writeon( ": " )
        end if_non_empty_line ;
        readcard( line );
    end while_not_eof
end.
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Arturo

parseFasta: function [data][
    result: #[]
    current: ø
    loop split.lines data 'line [
        if? `>` = first line [
            current: slice line 1 (size line)-1
            set result current ""
        ]
        else ->
            set result current (get result current)++line
    ]
    return result
]

text: {
    >Rosetta_Example_1
    THERECANBENOSPACE
    >Rosetta_Example_2
    THERECANBESEVERAL
    LINESBUTTHEYALLMUST
    BECONCATENATED    
}

inspect.muted parseFasta text
Output:
[ :dictionary
	Rosetta_Example_1  :	THERECANBENOSPACE :string
	Rosetta_Example_2  :	THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED :string
]

AutoHotkey

Data =
(
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
)

Data := RegExReplace(RegExReplace(Data, ">\V+\K\v+", ": "), "\v+(?!>)")
Gui, add, Edit, w700,  % Data 
Gui, show
return
Output:
>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

AWK

# syntax: GAWK -f FASTA_FORMAT.AWK filename
# stop processing each file when an error is encountered
{   if (FNR == 1) {
      header_found = 0
      if ($0 !~ /^[;>]/) {
        error("record is not valid")
        nextfile
      }
    }
    if ($0 ~ /^;/) { next } # comment begins with a ";"
    if ($0 ~ /^>/) { # header
      if (header_found > 0) {
        printf("\n") # EOL for previous sequence
      }
      printf("%s: ",substr($0,2))
      header_found = 1
      next
    }
    if ($0 ~ /[ \t]/) { next } # ignore records with whitespace
    if ($0 ~ /\*$/) { # sequence may end with an "*"
      if (header_found > 0) {
        printf("%s\n",substr($0,1,length($0)-1))
        header_found = 0
        next
      }
      else {
        error("end of sequence found but header is missing")
        nextfile
      }
    }
    if (header_found > 0) {
      printf("%s",$0)
    }
    else {
      error("header not found")
      nextfile
    }
}
ENDFILE {
    if (header_found > 0) {
      printf("\n")
    }
}
END {
    exit (errors == 0) ? 0 : 1
}
function error(message) {
    printf("error: FILENAME=%s, FNR=%d, %s, %s\n",FILENAME,FNR,message,$0) >"con"
    errors++
    return
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

BASIC

QBasic

Works with: QBasic version 1.1
Works with: QuickBasic version 4.5
FUNCTION checkNoSpaces (s$)
    FOR i = 1 TO LEN(s$) - 1
        IF MID$(s$, i, 1) = CHR$(32) OR MID$(s$, i, 1) = CHR$(9) THEN checkNoSpaces = 0
    NEXT i
    checkNoSpaces = 1
END FUNCTION

OPEN "input.fasta" FOR INPUT AS #1

first = 1

DO WHILE NOT EOF(1)
    LINE INPUT #1, ln$
    IF LEFT$(ln$, 1) = ">" THEN
        IF NOT first THEN PRINT
        PRINT MID$(ln$, 2); ": ";
        IF first THEN first = 0
    ELSEIF first THEN
        PRINT : PRINT "Error : File does not begin with '>'"
        EXIT DO
    ELSE
        IF checkNoSpaces(ln$) THEN
            PRINT ln$;
        ELSE
            PRINT : PRINT "Error : Sequence contains space(s)"
            EXIT DO
        END IF
    END IF
LOOP
CLOSE #1

True BASIC

Translation of: QBasic
DEF EOF(f)
    IF END #f THEN LET EOF = -1 ELSE LET EOF = 0
END DEF

FUNCTION checknospaces(s$)
    FOR i = 1 TO LEN(s$)-1
        IF (s$)[i:1] = CHR$(32) OR (s$)[i:1] = CHR$(9) THEN LET checkNoSpaces = 0
    NEXT i
    LET checknospaces = 1
END FUNCTION

OPEN #1: NAME "m:\input.fasta", org text, ACCESS INPUT, create old

LET first = 1
DO WHILE (NOT EOF(1)<>0)
   LINE INPUT #1: ln$
   IF (ln$)[1:1] = ">" THEN
      IF (NOT first<>0) THEN PRINT
      PRINT (ln$)[2:maxnum]; ": ";
      IF first<>0 THEN LET first = 0
   ELSEIF first<>0 THEN
      PRINT "Error : File does not begin with '>'"
      EXIT DO
   ELSE
      IF checknospaces(ln$)<>0 THEN
         PRINT ln$;
      ELSE
         PRINT "Error : Sequence contains space(s)"
         EXIT DO
      END IF
   END IF
LOOP
CLOSE #1
END

BASIC256

open 1, "input.fasta"

first = True

while not eof(1)
    ln = readline(1)
    if left(ln, 1) = ">"  then
        if not first then print
        print mid(ln, 2, length(ln)-2) & ": ";
        if first then first = False
    else
        if first then
            print "Error : File does not begin with '>'"
            exit while
        else
            if checkNoSpaces(ln) then
                print left(ln, length(ln)-2);
            else
                print "Error : Sequence contains space(s)"
                exit while
            end if
        end if
    end if
end while
close 1
end

function checkNoSpaces(s)
    for i = 1 to length(s) - 1
        if chr(mid(s,i,1)) = 32 or chr(mid(s,i,1)) = 9 then return False
    next i
    return True
end function

C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void main()
{
	FILE * fp;
	char * line = NULL;
	size_t len = 0;
	ssize_t read;

	fp = fopen("fasta.txt", "r");
	if (fp == NULL)
		exit(EXIT_FAILURE);

	int state = 0;
	while ((read = getline(&line, &len, fp)) != -1) {
		/* Delete trailing newline */
		if (line[read - 1] == '\n')
			line[read - 1] = 0;
		/* Handle comment lines*/
		if (line[0] == '>') {
			if (state == 1)
				printf("\n");
			printf("%s: ", line+1);
			state = 1;
		} else {
			/* Print everything else */
			printf("%s", line);
		}
	}
	printf("\n");

	fclose(fp);
	if (line)
		free(line);
	exit(EXIT_SUCCESS);
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

class Program
{
    public class FastaEntry
    {
        public string Name { get; set; }
        public StringBuilder Sequence { get; set; }
    }

    static IEnumerable<FastaEntry> ParseFasta(StreamReader fastaFile)
    {
        FastaEntry f = null;
        string line;
        while ((line = fastaFile.ReadLine()) != null)
        {
            // ignore comment lines
            if (line.StartsWith(";"))
                continue;

            if (line.StartsWith(">"))
            {
                if (f != null)
                    yield return f;
                f = new FastaEntry { Name = line.Substring(1), Sequence = new StringBuilder() };
            }
            else if (f != null)
                f.Sequence.Append(line);
        }
        yield return f;
    }

    static void Main(string[] args)
    {
        try
        {
            using (var fastaFile = new StreamReader("fasta.txt"))
            {
                foreach (FastaEntry f in ParseFasta(fastaFile))
                    Console.WriteLine("{0}: {1}", f.Name, f.Sequence);
            }
        }
        catch (FileNotFoundException e)
        {
            Console.WriteLine(e);
        }
        Console.ReadLine();
    }
}

C++

#include <iostream>
#include <fstream>

int main( int argc, char **argv ){
    if( argc <= 1 ){
        std::cerr << "Usage: "<<argv[0]<<" [infile]" << std::endl;
        return -1;
    }

    std::ifstream input(argv[1]);
    if(!input.good()){
        std::cerr << "Error opening '"<<argv[1]<<"'. Bailing out." << std::endl;
        return -1;
    }

    std::string line, name, content;
    while( std::getline( input, line ).good() ){
        if( line.empty() || line[0] == '>' ){ // Identifier marker
            if( !name.empty() ){ // Print out what we read from the last entry
                std::cout << name << " : " << content << std::endl;
                name.clear();
            }
            if( !line.empty() ){
                name = line.substr(1);
            }
            content.clear();
        } else if( !name.empty() ){
            if( line.find(' ') != std::string::npos ){ // Invalid sequence--no spaces allowed
                name.clear();
                content.clear();
            } else {
                content += line;
            }
        }
    }
    if( !name.empty() ){ // Print out what we read from the last entry
        std::cout << name << " : " << content << std::endl;
    }
    
    return 0;
}
Output:
Rosetta_Example_1 : THERECANBENOSPACE
Rosetta_Example_2 : THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Clojure

(defn fasta [pathname]
  (with-open [r (clojure.java.io/reader pathname)]
    (doseq [line (line-seq r)]
      (if (= (first line) \>)
          (print (format "%n%s: " (subs line 1)))
        (print line)))))

Common Lisp

;; * The input file as a parameter
(defparameter *input* #p"fasta.txt"
              "The input file name.")

;; * Reading the data
(with-open-file (data *input*)
  (loop
     :for line = (read-line data nil nil)
     :while line
     ;; Check if we have a comment using a simple test instead of a RegEx
     :if (char= #\> (char line 0))
     :do (format t "~&~a: " (subseq line 1))
     :else
     :do (format t "~a" line)))
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Crystal

If you want to run below code online, then paste below code to playground

# create tmp fasta file in /tmp/
tmpfile = "/tmp/tmp"+Random.rand.to_s+".fasta"
File.write(tmpfile, ">Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED")

# read tmp fasta file and store to hash
ref = tmpfile
id = seq = ""
fasta = {} of String => String
File.each_line(ref) do |line|
  if line.starts_with?(">")
    fasta[id] = seq.sub(/\s/, "") if id != ""
    id = line.split(/\s/)[0].lstrip(">")
    seq = ""
  else
    seq += line
  end
end
fasta[id] = seq.sub(/\s/, "")

# show fasta component
fasta.each { |k,v| puts "#{k}: #{v}"}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Delphi

See Pascal.

EasyLang

repeat
   s$ = input
   until s$ = ""
   if substr s$ 1 1 = ">"
      if stat = 1
         print ""
      .
      stat = 1
      print s$
   else
      write s$
   .
. 
input_data
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED

F#

//FASTA format. Nigel Galloway: March 23rd., 2023.
let fN(g:string)=match g[0] with '>'->printfn "\n%s:" g[1..] |_->printf "%s" g
let lines=seq{use n=System.IO.File.OpenText("testFASTA.txt") in while not n.EndOfStream do yield n.ReadLine()}
printfn "%s:" ((Seq.head lines)[1..]); Seq.tail lines|>Seq.iter fN; printfn ""
Output:
Rosetta_Example_1:
THERECANBENOSPACE
Rosetta_Example_2:
THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Factor

USING: formatting io kernel sequences ;
IN: rosetta-code.fasta

: process-fasta-line ( str -- )
    dup ">" head? [ rest "\n%s: " printf ] [ write ] if ;

: main ( -- )
    readln rest "%s: " printf [ process-fasta-line ] each-line ;

MAIN: main
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Forth

Developed with gforth 0.7.9

1024      constant  max-Line
char >    constant  marker

: read-lines        begin  pad max-line >r over r> swap
                           read-line throw
                    while  pad dup c@ marker =
                           if cr 1+ swap type ."  : "
                           else swap  type
                           then
                    repeat drop  ;

: Test              s" ./FASTA.txt" r/o open-file throw
                    read-lines
                    close-file throw
                    cr ;
Test
Output:
Rosetta_Example_1 : THERECANBENOSPACE
Rosetta_Example_2 : THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

FreeBASIC

This program sticks to the task as described in the heading and doesn't allow for any of the (apparently) obsolete practices described in the Wikipedia article :

' FB 1.05.0 Win64

Function checkNoSpaces(s As String) As Boolean
  For i As UInteger = 0 To Len(s) - 1
    If s[i] = 32 OrElse s[i] = 9 Then Return False  '' check for spaces or tabs
  Next
  Return True
End Function
    
Open "input.fasta" For Input As # 1

Dim As String ln, seq
Dim first As Boolean = True

While Not Eof(1)
  Line Input #1, ln
  If Left(ln, 1) = ">"  Then
    If Not first Then Print 
    Print Mid(ln, 2); ": ";       
    If first Then first = False
  ElseIf first Then
    Print: Print "Error : File does not begin with '>'";
    Exit While
  Else
    If checkNoSpaces(ln) Then
      Print ln;
    Else
      Print : Print "Error : Sequence contains space(s)";
      Exit While
    End If
  End If
Wend

Close #1

Print : Print
Print "Press any key to quit"
Sleep
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Gambas

Public Sub Main()
Dim sList As String = File.Load("../FASTA")
Dim sTemp, sOutput As String

For Each sTemp In Split(sList, gb.NewLine)
  If sTemp Begins ">" Then
    If sOutput Then Print sOutput
    sOutput = Right(sTemp, -1) & ": "
  Else
    sOutput &= sTemp
  Endif
Next

Print sOutput

End

Output:

Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Go

package main

import (
        "bufio"
        "fmt"
        "os"
)

func main() {
        f, err := os.Open("rc.fasta")
        if err != nil {
                fmt.Println(err)
                return
        }
        defer f.Close()
        s := bufio.NewScanner(f)
        headerFound := false
        for s.Scan() {
                line := s.Text()
                switch {
                case line == "":
                        continue
                case line[0] != '>':
                        if !headerFound {
                                fmt.Println("missing header")
                                return
                        }
                        fmt.Print(line)
                case headerFound:
                        fmt.Println()
                        fallthrough
                default:
                        fmt.Printf("%s: ", line[1:])
                        headerFound = true
                }
        }
        if headerFound {
                fmt.Println()
        }
        if err := s.Err(); err != nil {
                fmt.Println(err)
        }
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Haskell

We pass the file path as an argument to the parseFasta function, which only does the file loading and result printing.

The first way

We parse FASTA by hand (generally not a recommended approach). We use the fact that groupBy walks the list from the head and groups the items by a predicate; here we first concatenate all the fasta strings and then pair those with each respective name.

import Data.List ( groupBy )

parseFasta :: FilePath -> IO ()
parseFasta fileName = do
  file <- readFile fileName
  let pairedFasta = readFasta $ lines file
  mapM_ (\(name, code) -> putStrLn $ name ++ ": " ++ code) pairedFasta

readFasta :: [String] -> [(String, String)]
readFasta = pair . map concat . groupBy (\x y -> notName x && notName y)
 where
  notName :: String -> Bool
  notName = (/=) '>' . head

  pair :: [String] -> [(String, String)]
  pair []           = []
  pair (x : y : xs) = (drop 1 x, y) : pair xs
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

The second way

We parse FASTA using parser combinators. Normally you'd use something like Trifecta or Parsec, but here we use ReadP, because it is simple and also included in ghc by default. With other parsing libraries the code would be almost the same.

import Text.ParserCombinators.ReadP
import Control.Applicative ( (<|>) )
import Data.Char ( isAlpha, isAlphaNum )

parseFasta :: FilePath -> IO ()
parseFasta fileName = do
  file <- readFile fileName
  let pairs = fst . last . readP_to_S readFasta $ file
  mapM_ (\(name, code) -> putStrLn $ name ++ ": " ++ code) pairs


readFasta :: ReadP [(String, String)]
readFasta = many pair <* eof
 where
  pair    = (,) <$> name <*> code
  name    = char '>' *> many (satisfy isAlphaNum <|> char '_') <* newline
  code    = concat <$> many (many (satisfy isAlpha) <* newline)
  newline = char '\n'
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

J

Needs chunking to handle huge files.

require 'strings'  NB. not needed for J versions greater than 6.
parseFasta=: ((': ' ,~ LF&taketo) , (LF -.~ LF&takeafter));._1

Example Usage

   Fafile=: noun define
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
)
   parseFasta Fafile
Rosetta_Example_1: THERECANBENOSPACE                                 
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Nowadays, most machines have gigabytes of memory. However, if it's necessary to process FASTA content on a system with inadequate memory we can use files to hold intermediate results. For example:

bs=: 2
chunkFasta=: {{
  r=. EMPTY
  bad=. a.-.a.{~;48 65 97(+i.)each 10 26 26
  dir=. x,'/'
  off=. 0
  siz=. fsize y
  block=. dest=. ''
  while. off < siz do.
    block=. block,fread y;off([, [ -~ siz<.+)bs
    off=. off+bs
    while. LF e. block do.
      line=. LF taketo block
      select. {.line
        case. ';' do. 
        case. '>' do. 
          start=. }.line-.CR
          r=.r,(head=. name,'.head');<name=. dir,start -. bad
          start fwrite head
          '' fwrite name
        case. do.
          (line-.bad) fappend name
      end.
      block=. LF takeafter block
    end.
  end.
  r
}}

Here, we're using a block size of 2 bytes, to illustrate correctness. If speed matters, we should use something significantly larger.

The left argument to chunkFasta names the directory used to hold content extracted from the FASTA file. The right argument names that FASTA file. The result identifies the extracted headers and contents

Thus, if '~/fasta.txt' contains the example file for this task and we want to store intermediate results in the '~temp' directory, we could use:

   fasta=: '~temp' chunkFasta '~/fasta.txt'

And, to complete the task:

   ;(,': ',,&LF)each/"1 fread each fasta
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Java

This implementation presumes the data-file is well-formed

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public static void main(String[] args) throws IOException {
    List<FASTA> fastas = readFile("fastas.txt");
    for (FASTA fasta : fastas)
        System.out.println(fasta);
}

static List<FASTA> readFile(String path) throws IOException {
    try (BufferedReader reader = new BufferedReader(new FileReader(path))) {
        List<FASTA> list = new ArrayList<>();
        StringBuilder lines = null;
        String newline = System.lineSeparator();
        String line;
        while ((line = reader.readLine()) != null) {
            if (line.startsWith(">")) {
                if (lines != null)
                    list.add(parseFASTA(lines.toString()));
                lines = new StringBuilder();
                lines.append(line).append(newline);
            } else {
                lines.append(line);
            }
        }
        list.add(parseFASTA(lines.toString()));
        return list;
    }
}

static FASTA parseFASTA(String string) {
    String description;
    char[] sequence;
    int indexOf = string.indexOf(System.lineSeparator());
    description = string.substring(1, indexOf);
    /* using 'stripLeading' will remove any additional line-separators */
    sequence = string.substring(indexOf + 1).stripLeading().toCharArray();
    return new FASTA(description, sequence);
}

/* using a 'char' array seems more logical */
record FASTA(String description, char[] sequence) {
    @Override
    public String toString() {
        return "%s: %s".formatted(description, new String(sequence));
    }
}
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED


An alternate demonstration

Translation of: D
Works with: Java version 7
import java.io.*;
import java.util.Scanner;

public class ReadFastaFile {

    public static void main(String[] args) throws FileNotFoundException {

        boolean first = true;

        try (Scanner sc = new Scanner(new File("test.fasta"))) {
            while (sc.hasNextLine()) {
                String line = sc.nextLine().trim();
                if (line.charAt(0) == '>') {
                    if (first)
                        first = false;
                    else
                        System.out.println();
                    System.out.printf("%s: ", line.substring(1));
                } else {
                    System.out.print(line);
                }
            }
        }
        System.out.println();
    }
}
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED
Rosetta_Example_3: THISISFASTA

JavaScript

The code below uses Nodejs to read the file.

const fs = require("fs");
const readline = require("readline");
 
const args = process.argv.slice(2);
if (!args.length) {
    console.error("must supply file name");
    process.exit(1);
}
 
const fname = args[0];
 
const readInterface = readline.createInterface({
    input: fs.createReadStream(fname),
    console: false,
});
 
let sep = "";
readInterface.on("line", (line) => {
    if (line.startsWith(">")) {
        process.stdout.write(sep);
        sep = "\n";
        process.stdout.write(line.substring(1) + ": ");
    } else {
        process.stdout.write(line);
    }
});

readInterface.on("close", () => process.stdout.write("\n"));
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

jq

Works with: jq version 1.5rc1

The following implementation uses "foreach" and "inputs" so that very large input files can be processed with minimal space requirements: in each cycle, only as many lines are read as are required to compose an output line.
Notice that an additional ">" must be provided to "foreach" to ensure the final block of lines of the input file are properly assembled.

def fasta:
  foreach (inputs, ">") as $line
    # state: [accumulator, print ]
    ( [null, null];
      if $line[0:1] == ">" then [($line[1:] + ": "), .[0]]
      else [ (.[0] + $line), false]
      end;
      if .[1] then .[1] else empty end )
    ;

fasta
Output:
$ jq -n -R -r -f FASTA_format.jq < FASTA_format.fasta
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Julia

Works with: Julia version 0.6
for line in eachline("data/fasta.txt")
    if startswith(line, '>')
        print(STDOUT, "\n$(line[2:end]): ")
    else
        print(STDOUT, "$line")
    end
end

Kotlin

Translation of: FreeBASIC
// version 1.1.2

import java.util.Scanner
import java.io.File

fun checkNoSpaces(s: String) = ' ' !in s && '\t' !in s

fun main(args: Array<String>) {
    var first = true
    val sc = Scanner(File("input.fasta"))
    while (sc.hasNextLine()) {
        val line = sc.nextLine()
        if (line[0] == '>') {
            if (!first) println()
            print("${line.substring(1)}: ")
            if (first) first = false
        }
        else if (first) {
            println("Error : File does not begin with '>'")
            break
        }
        else if (checkNoSpaces(line))
            print(line)
        else {
            println("\nError : Sequence contains space(s)")
            break
        }
    }
    sc.close()
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Lua

local file = io.open("input.txt","r")
local data = file:read("*a")
file:close()

local output = {}
local key = nil

-- iterate through lines
for line in data:gmatch("(.-)\r?\n") do
	if line:match("%s") then
		error("line contained space")
	elseif line:sub(1,1) == ">" then
		key = line:sub(2)
		-- if key already exists, append to the previous input
		output[key] = output[key] or ""
	elseif key ~= nil then
		output[key] = output[key] .. line
	end
end

-- print result
for k,v in pairs(output) do
	print(k..": "..v)
end
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

M2000 Interpreter

Spaghetti code, using Goto, but works using partially reading of an input stream, with no known size of each reading (supposed data transmitted). We make an object as a FASTA_MACHINE, and run it. Object produce events, so we have some functions for services. These functions called as subs, but we have to use New if we want to shadow any same named variable. (subs always include the New (a Read New) so we didn't use there). If there no modules variables with same names as for arguments for these functions then we can exclude New. All these functions have same scope as the module where they belong.

We can use ";" for comments, ">" for title. We can input one char, or many, in each input packet. Linefeed by default is CRLF. Whitespaces are spaces, nbsp, and tabs.


Module CheckIt {
      Class FASTA_MACHINE {
            Events "GetBuffer", "header", "DataLine", "Quit"
      Public:
            Module Run {
                  Const lineFeed$=chr$(13)+chr$(10)
                  Const WhiteSpace$=" "+chr$(9)+chrcode$(160)
                  Def long state=1, idstate=1
                  Def boolean Quit=False
                  Def Buf$, waste$, Packet$
            GetNextPacket:
                        Call Event "Quit", &Quit
                        If Quit then exit
                        Call Event "GetBuffer", &Packet$
                        Buf$+=Packet$
                        If len(Buf$)=0 Then exit
                        On State Goto GetStartIdentifier, GetIdentifier, GetStartData, GetData, GetStartIdentifier2
                        exit
            GetStartIdentifier:
                        waste$=rightpart$(Buf$, ">")
            GetStartIdentifier2:
                        If len(waste$)=0 Then waste$=rightpart$(Buf$, ";") : idstate=2
                        If len(waste$)=0 Then idstate=1 : Goto GetNextPacket ' we have to read more
                        buf$=waste$
                        state=2            
            GetIdentifier:
                        If Len(Buf$)=len(lineFeed$) then {
                              if buf$<>lineFeed$ then Goto GetNextPacket 
                              waste$=""
                        } Else {
                              if instr(buf$, lineFeed$)=0 then Goto GetNextPacket
                              waste$=rightpart$(Buf$, lineFeed$)
                          }
                        If idstate=2 Then {
                            idstate=1
                            \\ it's a comment, drop it
                            state=1
                            Goto GetNextPacket
                        } Else Call Event "header", filter$(leftpart$(Buf$,lineFeed$), WhiteSpace$)
                        Buf$=waste$
                        State=3
            GetStartData:
                        while left$(buf$, 2)=lineFeed$ {buf$=Mid$(buf$,3)}
                        waste$=Leftpart$(Buf$, lineFeed$)
                        If len(waste$)=0 Then  Goto GetNextPacket ' we have to read more
                        waste$=Filter$(waste$,WhiteSpace$)
                        Call Event "DataLine", leftpart$(Buf$,lineFeed$)
                        Buf$=Rightpart$(Buf$,lineFeed$)
                        state=4
            GetData:
                        while left$(buf$, 2)=lineFeed$ {buf$=Mid$(buf$,3)}
                        waste$=Leftpart$(Buf$, lineFeed$)
                        If len(waste$)=0 Then  Goto GetNextPacket ' we have to read more
                        If Left$(waste$,1)=";" Then wast$="": state=5 : Goto GetStartIdentifier2
                        If Left$(waste$,1)=">" Then state=1 : Goto GetStartIdentifier
                        waste$=Filter$(waste$,WhiteSpace$)
                        Call Event "DataLine", waste$
                        Buf$=Rightpart$(Buf$,lineFeed$)
                        Goto GetNextPacket
            }     
      }
      Group WithEvents K=FASTA_MACHINE()
      Document Final$, Inp$
      
      \\ In documents, "="" used for append data. Final$="append this"
      Const NewLine$=chr$(13)+chr$(10)
      Const Center=2
      \\ Event's Functions
      Function K_GetBuffer (New &a$) {
            Input "IN:", a$
            inp$=a$+NewLine$
            while right$(a$, 1)="\" {
                  Input "IN:", b$
                  inp$=b$+NewLine$
                  if b$="" then b$="n" 
                  a$+=b$
            }
            a$= replace$("\N","\n", a$)
            a$= replace$("\n",NewLine$, a$)
      }
      Function K_header (New a$) {
            iF Doc.Len(Final$)=0 then {
                  Final$=a$+": "
            } Else Final$=Newline$+a$+": "
      }
      Function K_DataLine (New a$) {
            Final$=a$
      }
      Function K_Quit (New &q) {
            q=keypress(1) 
      }
      Cls , 0
      Report Center, "FASTA Format"
      Report "Simulate input channel in packets (\n for new line). Use empty input to exit after new line, or press left mouse button and Enter to quit. Use ; to write comments. Use > to open a title"
      Cls, row  ' scroll from current row
      K.Run
      Cls
      Report Center, "Input File"
      Report Inp$
      Report Center, "Output File"
      Report Final$
}
checkit

Mathematica/Wolfram Language

Mathematica has built-in support for FASTA files and strings

ImportString[">Rosetta_Example_1
 THERECANBENOSPACE
 >Rosetta_Example_2
 THERECANBESEVERAL
 LINESBUTTHEYALLMUST
 BECONCATENATED
 ", "FASTA"]
Output:
{"THERECANBENOSPACE", "THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED"}

Nim

import strutils

let input = """>Rosetta_Example_1
    THERECANBENOSPACE
    >Rosetta_Example_2
    THERECANBESEVERAL
    LINESBUTTHEYALLMUST
    BECONCATENATED""".unindent

proc fasta*(input: string) =
    var row = ""
    for line in input.splitLines:
        if line.startsWith(">"):
            if row != "": echo row
            row = line[1..^1] & ": "
        else:
            row &= line.strip
    echo row

fasta(input)
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Oberon

Works with A2 Oberon.

MODULE Fasta;

IMPORT Files, Streams, Strings, Commands;

PROCEDURE PrintOn*(filename: ARRAY OF CHAR; wr: Streams.Writer);
VAR
	rd: Files.Reader;
	f: Files.File;
	line: ARRAY 1024 OF CHAR;
	res: BOOLEAN;
BEGIN
	f := Files.Old(filename);
	ASSERT(f # NIL);
	NEW(rd,f,0);
	res := rd.GetString(line);
	WHILE rd.res # Streams.EOF DO
		IF line[0] = '>' THEN
			wr.Ln;
			wr.String(Strings.Substring2(1,line)^);
			wr.String(": ")
		ELSE
			wr.String(line)
		END;
		res := rd.GetString(line)
	END
END PrintOn;

PROCEDURE Do*;
VAR
	ctx: Commands.Context;
	filename: ARRAY 256 OF CHAR;
	res: BOOLEAN
BEGIN
	ctx := Commands.GetContext();
	res := ctx.arg.GetString(filename);
	PrintOn(filename,ctx.out)
END Do;

END Fasta.
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Objeck

class Fasta {
  function : Main(args : String[]) ~ Nil {
    if(args->Size() = 1) {
      is_line := false;
      tokens := System.Utility.Parser->Tokenize(System.IO.File.FileReader->ReadFile(args[0]))<String>;
      each(i : tokens) {
          token := tokens->Get(i);
          if(token->Get(0) = '>') {
            is_line := true;
            if(i <> 0) {
              "\n"->Print();
            };
          } 
          else if(is_line) {
            "{$token}: "->Print();
            is_line := false;
          }
          else {
            token->Print();
          };
        };
      };
    };
  }
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

OCaml

I keep it simple by sticking to the description of the FASTA format described in the task.

The program reads and processes the input one line at a time, and directly prints out the chunk of data available. The long strings are not concatenated in memory but just examined and processed as necessary: either printed out as is in the case of part of a sequence, or formatted in the case of the name (what I call the label), and managing the new lines where needed.

Works with: OCaml version 4.03+
(* This program reads from the standard input and writes to standard output.
 * Examples of use:
 *    $ ocaml fasta.ml < fasta_file.txt
 *    $ ocaml fasta.ml < fasta_file.txt > my_result.txt
 *
 * The FASTA file is assumed to have a specific format, where the first line
 * contains a label in the form of '>blablabla', i.e. with a '>' as the first
 * character.
 *)

let labelstart = '>'

let is_label s = s.[0] = labelstart
let get_label s = String.sub s 1 (String.length s - 1)

let read_in channel = input_line channel |> String.trim

let print_fasta chan =
  let rec doloop currlabel line =
    if is_label line then begin
        if currlabel <> "" then print_newline ();
        let newlabel = get_label line in
        print_string (newlabel ^ ": ");
        doloop newlabel (read_in chan)
    end
    else begin
        print_string line;
        doloop currlabel (read_in chan)
    end
  in
  try
    match read_in chan with
    | line when is_label line -> doloop "" line
    | _ -> failwith "Badly formatted FASTA file?"
  with
    End_of_file -> print_newline ()


let () =
  print_fasta stdin
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Pascal

program FASTA_Format;
// FPC 3.0.2
var InF,
    OutF: Text;
    ch: char;
    First: Boolean=True;
    InDef: Boolean=False;

begin
  Assign(InF,'');
  Reset(InF);
  Assign(OutF,'');
  Rewrite(OutF);
  While Not Eof(InF) do
  begin
    Read(InF,ch);
    Case Ch of
      '>': begin
            if Not(First) then
              Write(OutF,#13#10)
            else
              First:=False;
            InDef:=true;
          end;
      #13: Begin
               if InDef then
               begin
                 InDef:=false;
                 Write(OutF,': ');
               end;
               Ch:=#0;
             end;
      #10: ch:=#0;
      else Write(OutF,Ch);
    end;
  end;
  Close(OutF);
  Close(InF);
end.

FASTA_Format < test.fst

Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Perl

my $fasta_example = <<'END_FASTA_EXAMPLE';
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
END_FASTA_EXAMPLE

my $num_newlines = 0;
while ( < $fasta_example > ) {
	if (/\A\>(.*)/) {
		print "\n" x $num_newlines, $1, ': ';
	}
	else {
		$num_newlines = 1;
		print;
	}
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Phix

bool first = true
integer fn = open("fasta.txt","r")
if fn=-1 then ?9/0 end if
while true do
    object line = trim(gets(fn))
    if atom(line) then puts(1,"\n") exit end if
    if length(line) then
        if line[1]=='>' then
            if not first then puts(1,"\n") end if
            printf(1,"%s: ",{line[2..$]})
            first = false
        elsif first then
            printf(1,"Error : File does not begin with '>'\n")
            exit
        elsif not find_any(" \t",line) then
            puts(1,line)
        else
            printf(1,"\nError : Sequence contains space(s)\n")
            exit
        end if
    end if
end while
close(fn)
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

PicoLisp

(de fasta (F)
   (in F
      (while (from ">")
         (prin (line T) ": ")
         (until (or (= ">" (peek)) (eof))
            (prin (line T)) )
         (prinl) ) ) )
(fasta "fasta.dat")
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

PL/M

Works with: 8080 PL/M Compiler

... under CP/M (or an emulator)

Reads the data from the file named on the command line, e.g., if the program is stored in D:FASTA.COM and the data in D:FSTAIN.TXT, the following could be used: D:FASTA D:FASTAIN.TXT.
Restarts CP/M when the program finishes.

100H: /* DISPLAY THE CONTENTS OF A FASTA FORMT FILE                          */

   DECLARE FALSE    LITERALLY '0', TRUE LITERALLY '0FFH';
   DECLARE NL$CHAR  LITERALLY '0AH';                     /* NEWLINE: CHAR 10 */
   DECLARE CR$CHAR  LITERALLY '0DH';             /* CARRIAGE RETURN, CHAR 13 */
   DECLARE EOF$CHAR LITERALLY '26';                      /* EOF: CTRL-Z      */
   /* CP/M BDOS SYSTEM CALL, RETURNS A VALUE                                 */
   BDOS: PROCEDURE( FN, ARG )BYTE; DECLARE FN BYTE, ARG ADDRESS; GOTO 5; END;
   /* CP/M BDOS SYSTEM CALL, NO RETURN VALUE */
   BDOS$P: PROCEDURE( FN, ARG );   DECLARE FN BYTE, ARG ADDRESS; GOTO 5; END;
   EXIT:      PROCEDURE; CALL BDOS$P( 0, 0 ); END;      /* CP/M SYSTEM RESET */
   PR$CHAR:   PROCEDURE( C ); DECLARE C BYTE;    CALL BDOS$P( 2, C );    END;
   PR$STRING: PROCEDURE( S ); DECLARE S ADDRESS; CALL BDOS$P( 9, S );    END;
   PR$NL:     PROCEDURE; CALL PR$STRING( .( 0DH, NL$CHAR, '$' ) );       END;
   FL$EXISTS: PROCEDURE( FCB )BYTE; /* RETURNS TRUE IF THE FILE NAMED IN THE */
      DECLARE FCB ADDRESS;          /*                 FCB EXISTS            */
      RETURN ( BDOS( 17, FCB ) < 4 );
   END FL$EXISTS ;
   FL$OPEN:   PROCEDURE( FCB )BYTE; /* OPEN THE FILE WITH THE SPECIFIED FCB  */
      DECLARE FCB ADDRESS;
      RETURN ( BDOS( 15, FCB ) < 4 );
   END FL$OPEN;
   FL$READ:   PROCEDURE( FCB )BYTE; /* READ THE NEXT RECORD FROM FCB         */
      DECLARE FCB ADDRESS;
      RETURN ( BDOS( 20, FCB ) = 0 );
   END FL$READ;
   FL$CLOSE:  PROCEDURE( FCB )BYTE; /* CLOSE THE FILE WITH THE SPECIFIED FCB */
      DECLARE FCB ADDRESS;
      RETURN ( BDOS( 16, FCB ) < 4 );
   END FL$CLOSE;

   /* I/O USES FILE CONTROL BLOCKS CONTAINING THE FILE-NAME, POSITION, ETC.  */
   /* WHEN THE PROGRAM IS RUN, THE CCP WILL FIRST PARSE THE COMMAND LINE AND */
   /* PUT THE FIRST PARAMETER IN FCB1, THE SECOND PARAMETER IN FCB2          */
   /* BUT FCB2 OVERLAYS THE END OF FCB1 AND THE DMA BUFFER OVERLAYS THE END  */
   /* OF FCB2                                                                */

   DECLARE FCB$SIZE      LITERALLY '36';  /* SIZE OF A FCB                   */
   DECLARE FCB1          LITERALLY '5CH'; /* ADDRESS OF FIRST  FCB           */
   DECLARE FCB2          LITERALLY '6CH'; /* ADDRESS OF SECOND FCB           */
   DECLARE DMA$BUFFER    LITERALLY '80H'; /* DEFAULT DMA BUFFER ADDRESS      */
   DECLARE DMA$SIZE      LITERALLY '128'; /* SIZE OF THE DMA BUFFER          */

   DECLARE F$PTR ADDRESS, F$CHAR BASED F$PTR BYTE;

   /* CLEAR THE PARTS OF FCB1 OVERLAYED BY FCB2                              */
   DO F$PTR = FCB1 + 12 TO FCB1 + ( FCB$SIZE - 1 );
      F$CHAR = 0;
   END;

   /* SHOW THE FASTA DATA, IF THE FILE EXISTS                                */
   IF NOT FL$EXISTS( FCB1 ) THEN DO;    /* THE FILE DOES NOT EXIST           */
      CALL PR$STRING( .'FILE NOT FOUND$' );CALL PR$NL;
      END;
   ELSE IF NOT FL$OPEN( FCB1 ) THEN DO; /* UNABLE TO OPEN THE FILE           */
      CALL PR$STRING( .'UNABLE TO OPEN THE FILE$' );CALL PR$NL;
      END;
   ELSE DO; /* FILE EXISTS AND OPENED OK - ATTEMPT TO SHOW THE DATA          */
      DECLARE ( BOL, GOT$RCD, IS$HEADING ) BYTE, DMA$END ADDRESS;
      DMA$END    = DMA$BUFFER + ( DMA$SIZE - 1 );
      GOT$RCD    = FL$READ( FCB1 );                  /* GET THE FIRST RECORD */
      F$PTR      = DMA$BUFFER;
      BOL        = TRUE;
      IS$HEADING = FALSE;
      DO WHILE GOT$RCD;
         IF F$PTR > DMA$END THEN DO;                        /* END OF BUFFER */
            GOT$RCD = FL$READ( FCB1 );               /* GET THE NEXT RECORDD */
            F$PTR   = DMA$BUFFER;
            END;
         ELSE IF F$CHAR = NL$CHAR THEN DO;                    /* END OF LINE */
            IF IS$HEADING THEN DO;
                CALL PR$STRING( .': $' );
                IS$HEADING = FALSE;
            END;
            BOL = TRUE;
            END;
         ELSE IF F$CHAR = CR$CHAR THEN DO; END;    /* IGNORE CARRIAGE RETURN */
         ELSE IF F$CHAR = EOF$CHAR THEN GOT$RCD  = FALSE;     /* END OF FILE */
         ELSE DO;                                  /* HAVE ANOTHER CHARACTER */               
             IF NOT BOL THEN CALL PR$CHAR( F$CHAR );  /* NOT FIRST CHARACTER */
             ELSE DO;          /* FIRST CHARACTER - CHECK FOR A HEADING LINE */
                 BOL = FALSE;
                 IF IS$HEADING := F$CHAR = '>' THEN CALL PR$NL;
                 ELSE                               CALL PR$CHAR( F$CHAR );
             END;
         END;
         F$PTR = F$PTR + 1;
      END;
      /* CLOSE THE FILE                                                      */
      IF NOT FL$CLOSE( FCB1 ) THEN DO;
         CALL PR$STRING( .'UNABLE TO CLOSE THE FILE$' ); CALL PR$NL;
      END;
   END;

   CALL EXIT;

EOF
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

PowerShell

When working with a real file, the content of the $file variable would be: Get-Content -Path .\FASTA_file.txt -ReadCount 1000. The -ReadCount parameter value for large files is unknown, yet sure to be a value between 1,000 and 10,000 depending upon the length of file and length of the records in the file. Experimentation is the only way to know the optimum value.

Works with: PowerShell version 4.0+
$file = @'
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
'@

$lines = $file.Replace("`n","~").Split(">").ForEach({$_.TrimEnd("~").Split("`n",2,[StringSplitOptions]::RemoveEmptyEntries)})

$output = New-Object -TypeName PSObject

foreach ($line in $lines)
{
    $name, $value = $line.Split("~",2).ForEach({$_.Replace("~","")})

    $output | Add-Member -MemberType NoteProperty -Name $name -Value $value
}

$output | Format-List
Output:
Rosetta_Example_1 : THERECANBENOSPACE
Rosetta_Example_2 : THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Version 3.0 Or Less

$file = @'
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
'@

$lines = $file.Replace("`n","~").Split(">") | ForEach-Object {$_.TrimEnd("~").Split("`n",2,[StringSplitOptions]::RemoveEmptyEntries)}

$output = New-Object -TypeName PSObject

foreach ($line in $lines)
{
    $name, $value = $line.Split("~",2) | ForEach-Object {$_.Replace("~","")}

    $output | Add-Member -MemberType NoteProperty -Name $name -Value $value
}

$output | Format-List
Output:
Rosetta_Example_1 : THERECANBENOSPACE
Rosetta_Example_2 : THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

PureBasic

EnableExplicit
Define Hdl_File.i, 
       Frm_File.i,
       c.c,         
       header.b

Hdl_File=ReadFile(#PB_Any,"c:\code_pb\rosettacode\data\FASTA_TEST.txt")
If Not IsFile(Hdl_File) : End -1 : EndIf
Frm_File=ReadStringFormat(Hdl_File)

If OpenConsole("FASTA format")
  While Not Eof(Hdl_File)
    c=ReadCharacter(Hdl_File,Frm_File)  
    Select c
      Case '>'
        header=#True
        PrintN("")
      Case #LF, #CR
        If header
          Print(": ")
          header=#False        
        EndIf      
      Default      
        Print(Chr(c))
    EndSelect      
  Wend
  CloseFile(Hdl_File)
  Input()   
EndIf
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Python

I use a string to mimic an input file. If it was an input file, then the file is read line-by-line and I use a generator expression yielding key, value pairs as soon as they are read, keeping the minimum in memory.

import io

FASTA='''\
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED'''

infile = io.StringIO(FASTA)

def fasta_parse(infile):
    key = ''
    for line in infile:
        if line.startswith('>'):
            if key:
                yield key, val
            key, val = line[1:].rstrip().split()[0], ''
        elif key:
            val += line.rstrip()
    if key:
        yield key, val

print('\n'.join('%s: %s' % keyval for keyval in fasta_parse(infile)))
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

R

library("seqinr")

data <- c(">Rosetta_Example_1","THERECANBENOSPACE",">Rosetta_Example_2","THERECANBESEVERAL","LINESBUTTHEYALLMUST","BECONCATENATED")
fname <- "rosettacode.fasta"
f <- file(fname,"w+")
writeLines(data,f)
close(f)

fasta <- read.fasta(file = fname, as.string = TRUE, seqtype = "AA")
for (aline in fasta) {
  cat(attr(aline, 'Annot'), ":", aline, "\n")
}
Output:
>Rosetta_Example_1 : THERECANBENOSPACE 
>Rosetta_Example_2 : THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED 

Racket

#lang racket
(let loop ([m #t])
  (when m
    (when (regexp-try-match #rx"^>" (current-input-port))
      (unless (eq? #t m) (newline))
      (printf "~a: " (read-line)))
    (loop (regexp-match #rx"\n" (current-input-port) 0 #f
                        (current-output-port)))))
(newline)

Raku

(formerly Perl 6)

grammar FASTA {

    rule TOP    { <entry>+ }
    rule entry  { \> <title> <sequence> }
    token title { <.alnum>+ }
    token sequence { ( <.alnum>+ )+ % \n { make $0.join } }

}

FASTA.parse: q:to /§/;
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
§

for $/<entry>[] {
    say ~.<title>, " : ", .<sequence>.made;
}
Output:
Rosetta_Example_1 : THERECANBENOSPACE
Rosetta_Example_2 : THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

REXX

Neither REXX version reads the entire file into memory at one time;   lines are processed as they are read (one line at a time).

version 1

This REXX version correctly processes the examples shown.

/*REXX program reads a (bio-informational) FASTA file and displays the contents. */
Parse Arg ifid .                       /* iFID:  the input file to be read       */
If ifid=='' Then
  ifid='FASTA.IN'                      /* Not specified?  Then use the default   */
name=''                                /* the name of an output file (so far)    */
d=''                                   /* the value of the output file's         */
Do While lines(ifid)\==0               /* process the  FASTA  file contents      */
  x=strip(linein(ifid),'T')            /* read a line (a record) from the input  */
                                       /* and strip trailing blanks              */
  If left(x,1)=='>' Then Do            /* a new file id                          */
    Call out                           /* show output name and data              */
    name=substr(x,2)                   /* and get the new (or first) output name */
    d=''                               /* start with empty contents              */
    End
  Else                                 /* a line with data                       */
    d=d||x                             /* append it to output                    */
  End
Call out                               /* show output of last file used.         */
Exit

out:
If d\=='' Then                         /* if there ara data                      */
  Say name':' d                        /* show output name and data              */
Return
output   when using the default input filename:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

version 2

This REXX version handles   (see the talk page):

  •   blank lines
  •   sequences that end in an asterisk   [*]
  •   sequences that contain blanks, tabs, and other whitespace
  •   sequence names that are identified with a semicolon   [;]
/*REXX program reads a (bio-informational) FASTA file and displays the contents. */
Parse Arg iFID .                          /*iFID:  the input file to be read.    */
If iFID==''  Then iFID='FASTA2.IN'        /*Not specified?  Then use the default.*/
name=''                                   /*the name of an output file (so far). */
data=''
                                          /*the value of the output file's stuff.*/
Do While lines(iFID)\==0                  /*process the  FASTA  file contents.   */
  x=strip(linein(iFID),'T')               /*read a line (a record) from the file,*/
                                          /*--------- and strip trailing blanks. */
  Select
    When x=='' Then                       /* If the line is all blank,           */
      Nop                                 /* ignore it.                          */
    When left(x,1)==';' Then Do
      If name=='' Then name=substr(x,2)
      Say x
      End
    When left(x,1)=='>'  Then Do
      If data\=='' Then
        Say name':' data
      name=substr(x,2)
      data=''
      End
    Otherwise
      data=space(data||translate(x, ,'*'),0)
    End
  End
If data\=='' Then
  Say name':'  data                       /* [?]  show output of last file used. */
'''input:'''   The   '''FASTA2.IN'''   file is shown below:
<pre>
;LCBO - Prolactin precursor - Bovine
; a sample sequence in FASTA format
MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQVSLRDLFDRAVMVSHYIHDLSS
EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHHEVLMSLILGLLRSWNDPLYHL
VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGAKETEPYPVWSGLPSLQTKDED
ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC*

>MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
DIDGDGQVNYEEFVQMMTAK*

>gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
IENY
output   when using the default input filename:
;LCBO - Prolactin precursor - Bovine
; a sample sequence in FASTA format
LCBO - Prolactin precursor - Bovine: MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQVSLRDLFDRAVMVSHYIHDLSSEMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHHEVLMSLILGLLRSWNDPLYHLVTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGAKETEPYPVWSGLPSLQTKDEDARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC
MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken: ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREADIDGDGQVNYEEFVQMMTAK
gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]: LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY

Ring

# Project : FAST format

a = ">Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED"
 
i = 1
while i <= len(a)
      if substr(a,i,17) = ">Rosetta_Example_"  
         see nl 
         see substr(a,i,18) + ": " + nl
         i = i + 17
      else
         if ascii(substr(a,i,1)) > 20 
            see a[i] 
         ok
      ok
      i = i + 1
end

Output:

>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Ruby

def fasta_format(strings)
  out, text = [], ""
  strings.split("\n").each do |line|
    if line[0] == '>'
      out << text unless text.empty?
      text = line[1..-1] + ": "
    else
      text << line
    end
  end
  out << text unless text.empty?
end

data = <<'EOS'
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
EOS

puts fasta_format(data)
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Run BASIC

a$ = ">Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED"

i = 1
while i <= len(a$)
  if mid$(a$,i,17) = ">Rosetta_Example_" then 
    print 
    print mid$(a$,i,18);": ";
    i = i + 17
   else
    if asc(mid$(a$,i,1)) > 20 then print mid$(a$,i,1);
  end if
  i = i + 1
wend
Output:
>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Rust

This example is implemented using an iterator to reduce memory requirements and encourage code reuse.

use std::env;
use std::io::{BufReader, Lines};
use std::io::prelude::*;
use std::fs::File;

fn main() {
    let args: Vec<String> = env::args().collect();
    let f = File::open(&args[1]).unwrap();
    for line in FastaIter::new(f) {
        println!("{}", line);
    }
}

struct FastaIter<T> {
    buffer_lines: Lines<BufReader<T>>,
    current_name: Option<String>,
    current_sequence: String
}

impl<T: Read> FastaIter<T> {
    fn new(file: T) -> FastaIter<T> {
        FastaIter { buffer_lines: BufReader::new(file).lines(),
                    current_name: None,
                    current_sequence: String::new() }
    }
}

impl<T: Read> Iterator for FastaIter<T> {
    type Item = String;

    fn next(&mut self) -> Option<String> {
        while let Some(l) = self.buffer_lines.next() {
            let line = l.unwrap();
            if line.starts_with(">") {
                if self.current_name.is_some() {                    
                    let mut res = String::new();
                    res.push_str(self.current_name.as_ref().unwrap());
                    res.push_str(": ");
                    res.push_str(&self.current_sequence);
                    self.current_name = Some(String::from(&line[1..]));
                    self.current_sequence.clear();
                    return Some(res);
                } else {
                    self.current_name = Some(String::from(&line[1..]));
                    self.current_sequence.clear();
                }
                continue;
            }
            self.current_sequence.push_str(line.trim());
        }
        if self.current_name.is_some() {
            let mut res = String::new();
            res.push_str(self.current_name.as_ref().unwrap());
            res.push_str(": ");
            res.push_str(&self.current_sequence);
            self.current_name = None;
            self.current_sequence.clear();
            self.current_sequence.shrink_to_fit();
            return Some(res);
        }
        None
    }
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Scala

import java.io.File
import java.util.Scanner

object ReadFastaFile extends App {
  val sc = new Scanner(new File("test.fasta"))
  var first = true

  while (sc.hasNextLine) {
    val line = sc.nextLine.trim
    if (line.charAt(0) == '>') {
      if (first) first = false
      else println()
      printf("%s: ", line.substring(1))
    }
    else print(line)
  }

  println("~~~+~~~")
}

Scheme

(import (scheme base)
        (scheme file)
        (scheme write))

(with-input-from-file ; reads text from named file, one line at a time
  "fasta.txt"
  (lambda ()
    (do ((first-line? #t #f)
         (line (read-line) (read-line)))
      ((eof-object? line) (newline))
      (cond ((char=? #\> (string-ref line 0)) ; found a name
             (unless first-line? ; no newline on first name
               (newline))
             (display (string-copy line 1)) (display ": "))
            (else ; display the string directly
              (display line))))))
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Seed7

$ include "seed7_05.s7i";

const proc: main is func
  local
    var file: fastaFile is STD_NULL;
    var string: line is "";
    var boolean: first is TRUE;
  begin
    fastaFile := open("fasta_format.in", "r");
    if fastaFile <> STD_NULL then
      while hasNext(fastaFile) do
        line := getln(fastaFile);
        if startsWith(line, ">") then
          if first then
            first := FALSE;
          else
            writeln;
          end if;
          write(line[2 ..] <& ": ");
        else
          write(line);
        end if;
      end while;
      close(fastaFile);
    end if;
    writeln;
  end func;
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Sidef

Translation of: Ruby
func fasta_format(strings) {
    var out = []
    var text = ''
    for line in (strings.lines) {
        if (line.begins_with('>')) {
            text.len && (out << text)
            text = line.substr(1)+': '
        }
        else {
            text += line
        }
    }
    text.len && (out << text)
    return out
}

fasta_format(DATA.slurp).each { .say }

__DATA__
>Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Smalltalk

Works with Pharo Smalltalk

FileLocator home / aFilename readStreamDo: [ :stream |
		[ stream atEnd ] whileFalse: [
			| line |
			((line := stream nextLine) beginsWith: '>')
				ifTrue: [
					Transcript
						cr;
						show: (line copyFrom: 2 to: line size);
						show: ': ' ]
				ifFalse: [ Transcript show: line ] ] ]
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Tcl

proc fastaReader {filename} {
    set f [open $filename]
    set sep ""
    while {[gets $f line] >= 0} {
	if {[string match >* $line]} {
	    puts -nonewline "$sep[string range $line 1 end]: "
	    set sep "\n"
	} else {
	    puts -nonewline $line
	}
    }
    puts ""
    close $f
}

fastaReader ./rosettacode.fas
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

TMG

Unix TMG:

prog:   ignore(spaces)
loop:   parse(line)\loop parse(( = {*} ));
line:   ( name | * = {} | seqns );
name:   <>> ignore(none) smark string(nonl) scopy *
        ( [f>0?] = {} | = {*} ) [f=0]
        = { 1 2 <: > };
seqns:  smark string(nonl) scopy * [f=0];

none:   <<>>;
nonl:   !<<
>>;
spaces:	<< 	>>;

f:      1;

uBasic/4tH

If Cmd (0) < 2 Then Print "Usage: fasta <fasta file>" : End
If Set(a, Open (Cmd(2), "r")) < 0 Then Print "Cannot open \q";Cmd(2);"\q" : End

Do While Read (a)                      ' while there are lines to process
  t = Tok (0)                          ' get a lime

  If Peek(t, 0) = Ord(">") Then        ' if it's a marker
    Print Show (Chop(t, 1)); ": "; Show (FUNC(_Payload(a)))
    Continue                           ' get the payload and print it
  EndIf

  Print "Out of sequence" : Break      ' this should never happen
Loop

Close a                                ' close the file
End                                    ' and end the program

_Payload                               ' get the payload
  Param (1)
  Local (4)

  b@ := ""                             ' start with an empty string

  Do
    c@ = Mark(a@)                      ' mark its position
  While Read (a@)                      ' now read a line
    d@ = Tok (0)                       ' get the line
    If Peek (d@, 0) = Ord(">") Then e@ = Head(a@, c@) : Break
    b@ = Join (b@, d@)                 ' marker? reset position and exit
  Loop                                 ' if not add the line to current string

Return (b@)                            ' return the string
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

0 OK, 0:431 

V (Vlang)

const data = (
">Rosetta_Example_1
THERECANBENOSPACE
>Rosetta_Example_2
THERECANBESEVERAL
LINESBUTTHEYALLMUST
BECONCATENATED"
)

fn main() {
	mut i := 0
	for i <= data.len {
		if data.substr_ni(i, i + 17) == ">Rosetta_Example_" {
			print("\n" + data.substr_ni(i, i + 18) + ": ")
			i = i + 17
		}
		else {
			if data.substr_ni(i, i + 1) > "\x20" {print(data[i].ascii_str())}
		}
		i++
	}
}
Output:
>Rosetta_Example_1: THERECANBENOSPACE
>Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

Wren

Translation of: Kotlin

More or less.

import "io" for File

var checkNoSpaces = Fn.new { |s| !s.contains(" ") && !s.contains("\t") }

var first = true

var process = Fn.new { |line|
    if (line[0] == ">") {
        if (!first) System.print()
        System.write("%(line[1..-1]): ")
        if (first) first = false
    } else if (first) {
        Fiber.abort("File does not begin with '>'.")
    } else if (checkNoSpaces.call(line)) {
        System.write(line)
    } else {
        Fiber.abort("Sequence contains space(s).")
    }
}

var fileName = "input.fasta"
File.open(fileName) { |file|
    var offset = 0
    var line = ""
    while(true) {
        var b = file.readBytes(1, offset)
        offset = offset + 1
        if (b == "\n") {
            process.call(line)
            line = "" // reset line variable
        } else if (b == "\r") { // Windows
            // wait for following "\n"
        } else if (b == "") { // end of stream
            System.print()
            return
        } else {
            line = line + b
        }
    }
}
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

XPL0

proc Echo;      \Echo line of characters from file to screen
int Ch;
def LF=$0A, EOF=$1A;
[loop   [Ch:= ChIn(3);
        case Ch of
          EOF:  exit;
          LF:   quit
        other ChOut(0, Ch);
        ];
];

int Ch;
[FSet(FOpen("fasta.txt", 0), ^i);
loop    [Ch:= ChIn(3);
        if Ch = ^> then
                [CrLf(0);
                Echo;
                Text(0, ": ");
                ]
        else    ChOut(0, Ch);
        Echo;
        ];
]
Output:

Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED

zkl

fcn fasta(data){ // a lazy cruise through a FASTA file
   fcn(w){      // one string at a time, -->False garbage at front of file
      line:=w.next().strip();
      if(line[0]==">") w.pump(line[1,*]+": ",'wrap(l){
         if(l[0]==">") { w.push(l); Void.Stop } else l.strip()
      })
   }.fp(data.walker()) : Utils.Helpers.wap(_);
}
  • This assumes that white space at front or end of string is extraneous (excepting ">" lines).
  • Lazy, works for objects that support iterating over lines (ie most).
  • The fasta function returns an iterator that wraps a function taking an iterator. Uh, yeah. An initial iterator (Walker) is used to get lines, hold state and do push back when read the start of the next string. The function sucks up one string (using the iterator). The wrapping iterator (wap) traps the exception when the function waltzes off the end of the data and provides API for foreach (etc).

FASTA file:

foreach l in (fasta(File("fasta.txt"))) { println(l) }

FASTA data blob:

data:=Data(0,String,
   ">Rosetta_Example_1\nTHERECANBENOSPACE\n"
   ">Rosetta_Example_2\nTHERECANBESEVERAL\nLINESBUTTHEYALLMUST\n"
     "BECONCATENATED");
foreach l in (fasta(data)) { println(l) }
Output:
Rosetta_Example_1: THERECANBENOSPACE
Rosetta_Example_2: THERECANBESEVERALLINESBUTTHEYALLMUSTBECONCATENATED