CSV data manipulation: Difference between revisions

Content added Content deleted

Inline

Revision as of 22:30, 9 July 2013

CSV spreadsheet files are portable ways of storing tabular data. The CSV format is either flexible or ill defined, depending on a point of view, allowing for delimiters besides comma.

The task here is to read a CSV file, change some values and save the changes back to a file. For this task we will use the following CSV file:

C1,C2,C3,C4,C5
1,5,9,13,17
2,6,10,14,18
3,7,11,15,19
4,8,12,16,20

C++

<lang cpp>#include <map>

include <vector>
include <iostream>
include <fstream>
include <utility>
include <functional>
include <string>
include <sstream>
include <algorithm>
include <cctype>

class CSV { public:

   CSV(void) : m_nCols( 0 ), m_nRows( 0 )
   {}

   bool open( const char* filename, char delim = ',' )
   {
       std::ifstream file( filename );
   
       clear();
       if ( file.is_open() )
       {
           open( file, delim );
           return true;
       }

       return false;
   }

   void open( std::istream& istream, char delim = ',' )
   {
       std::string         line;

       clear();
       while ( std::getline( istream, line ) )
       {
           unsigned int nCol = 0;
           std::istringstream    lineStream(line);
           std::string           cell;

           while( std::getline( lineStream, cell, delim ) )
           {
               m_oData[std::make_pair( nCol, m_nRows )] = trim( cell );
               nCol++;
           }
           m_nCols = std::max( m_nCols, nCol );
           m_nRows++;
       }
   }

   bool save( const char* pFile, char delim = ',' )
   {
       std::ofstream ofile( pFile );
       if ( ofile.is_open() )
       {
           save( ofile );
           return true;
       }
       return false;
   }

   void save( std::ostream& ostream, char delim = ',' )
   {
       for ( unsigned int nRow = 0; nRow < m_nRows; nRow++ )
       {
           for ( unsigned int nCol = 0; nCol < m_nCols; nCol++ )
           {
               ostream << trim( m_oData[std::make_pair( nCol, nRow )] );
               if ( (nCol+1) < m_nCols )
               {
                   ostream << delim;
               }
               else
               {
                   ostream << std::endl;
               }
           }
       }
   }

   void clear()
   {
       m_oData.clear();
       m_nRows = m_nCols = 0;
   }

   std::string& operator()( unsigned int nCol, unsigned int nRow )
   {
       m_nCols = std::max( m_nCols, nCol+1 );
       m_nRows = std::max( m_nRows, nRow+1 );
       return m_oData[std::make_pair(nCol, nRow)];
   }

   inline unsigned int GetRows() { return m_nRows; }
   inline unsigned int GetCols() { return m_nCols; }

private:

   // trim string for empty spaces in begining and at the end
   inline std::string &trim(std::string &s) 
   {
       
       s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
       s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
       return s;
   }

private:

   std::map<std::pair<unsigned int, unsigned int>, std::string> m_oData;

   unsigned int    m_nCols;
   unsigned int    m_nRows;

};

int main() {

   CSV oCSV;

   oCSV.open( "test_in.csv" );
   oCSV( 0, 0 ) = "Column0";
   oCSV( 1, 1 ) = "100";
   oCSV( 2, 2 ) = "200";
   oCSV( 3, 3 ) = "300";
   oCSV( 4, 4 ) = "400";
   oCSV.save( "test_out.csv" );
   return 0;

}</lang>

Output (in test_out.csv):

Column0,C2,C3,C4,C5
1,100,9,13,17
2,6,200,14,18
3,7,11,300,19
4,8,12,16,400

C#

<lang csharp>using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO;

namespace CSV {

   class CSV
   {
       private Dictionary<Tuple<int, int>, string> _data;
       private int _rows;
       private int _cols;

       public int Rows { get { return _rows; } }
       public int Cols { get { return _cols; } }

       public CSV()
       {
           Clear();
       }

       public void Clear()
       {
           _rows = 0;
           _cols = 0;
           _data = new Dictionary<Tuple<int, int>, string>();
       }

       public void Open(StreamReader stream, char delim = ',')
       {
           string line;
           int col = 0;
           int row = 0;

           Clear();

           while ((line = stream.ReadLine()) != null)
           {
               if (line.Length > 0)
               {
                   string[] values = line.Split(delim);
                   col = 0;
                   foreach (var value in values)
                   {
                       this[col,row] = value;
                       col++;
                   }
                   row++;
               }
           }
           stream.Close();
       }

       public void Save(StreamWriter stream, char delim = ',')
       {
           for (int row = 0; row < _rows; row++)
           {
               for (int col = 0; col < _cols; col++)
               {
                   stream.Write(this[col, row]);
                   if (col < _cols - 1)
                   {
                       stream.Write(delim);
                   }
               }
               stream.WriteLine();
           }
           stream.Flush();
           stream.Close();
       }

       public string this[int col, int row]
       {
           get
           {
               try
               {
                   return _data[new Tuple<int, int>(col, row)];
               }
               catch
               {
                   return "";
               }
           }

           set
           {
               _data[new Tuple<int, int>(col, row)] = value.ToString().Trim();
               _rows = Math.Max(_rows, row + 1);
               _cols = Math.Max(_cols, col + 1);
           }
       }

       static void Main(string[] args)
       {
           CSV csv = new CSV();

           csv.Open(new StreamReader(@"test_in.csv"));
           csv[0, 0] = "Column0";
           csv[1, 1] = "100";
           csv[2, 2] = "200";
           csv[3, 3] = "300";
           csv[4, 4] = "400";
           csv.Save(new StreamWriter(@"test_out.csv"));
       }
   }

}</lang>

Output (in test_out.csv):

Column0,C2,C3,C4,C5
1,100,9,13,17
2,6,200,14,18
3,7,11,300,19
4,8,12,16,400

Icon and Unicon

This version only works in Unicon, but can be easily adapted to work in Icon.

<lang unicon>import Utils # To get CSV procedures

procedure main(A)

   f := open(A[1]) | &input
   i := 1
   write(!f)   # header line(?)
   every csv := parseCSV(!f) do {
       csv[i+:=1] *:= 100
       write(encodeCSV(csv))
       }

end</lang>

Sample run:

->csv csv.dat
C1,C2,C3,C4,C5
1,500,9,13,17
2,6,1000,14,18
3,7,11,1500,19
4,8,12,16,2000
->

Java

<lang java>import java.io.*; import java.awt.Point; import java.util.HashMap; import java.util.Scanner;

public class CSV {

   private HashMap<Point, String> _map = new HashMap<Point, String>();
   private int _cols;
   private int _rows;

   public void open(File file) throws FileNotFoundException, IOException {
       open(file, ',');
   }

   public void open(File file, char delimiter)
           throws FileNotFoundException, IOException {
       Scanner scanner = new Scanner(file);
       scanner.useDelimiter(Character.toString(delimiter));

       clear();

       while(scanner.hasNextLine()) {
           String[] values = scanner.nextLine().split(Character.toString(delimiter));

           int col = 0;
           for ( String value: values ) {
               _map.put(new Point(col, _rows), value);
               _cols = Math.max(_cols, ++col);
           }
           _rows++;
       }
       scanner.close();
   }

   public void save(File file) throws IOException {
       save(file, ',');
   }

   public void save(File file, char delimiter) throws IOException {
       FileWriter fw = new FileWriter(file);
       BufferedWriter bw = new BufferedWriter(fw);

       for (int row = 0; row < _rows; row++) {
           for (int col = 0; col < _cols; col++) {
               Point key = new Point(col, row);
               if (_map.containsKey(key)) {
                   bw.write(_map.get(key));
               }

               if ((col + 1) < _cols) {
                   bw.write(delimiter);
               }
           }
           bw.newLine();
       }
       bw.flush();
       bw.close();
   }

   public String get(int col, int row) {
       String val = "";
       Point key = new Point(col, row);
       if (_map.containsKey(key)) {
           val = _map.get(key);
       }
       return val;
   }

   public void put(int col, int row, String value) {
       _map.put(new Point(col, row), value);
       _cols = Math.max(_cols, col+1);
       _rows = Math.max(_rows, row+1);
   }

   public void clear() {
       _map.clear();
       _cols = 0;
       _rows = 0;
   }

   public int rows() {
       return _rows;
   }

   public int cols() {
       return _cols;
   }

   public static void main(String[] args) {
       try {
           CSV csv = new CSV();

           csv.open(new File("test_in.csv"));
           csv.put(0, 0, "Column0");
           csv.put(1, 1, "100");
           csv.put(2, 2, "200");
           csv.put(3, 3, "300");
           csv.put(4, 4, "400");
           csv.save(new File("test_out.csv"));
       } catch (Exception e) {
       }
   }

}</lang>

Output (in test_out.csv):

Column0,C2,C3,C4,C5
1,100,9,13,17
2,6,200,14,18
3,7,11,300,19
4,8,12,16,400

Mathematica's Import and Export functions support CSV files. <lang mathematica>iCSV=Import["test.csv"] ->{{"C1","C2","C3","C4","C5"},{1,5,9,13,17},{2,6,10,14,18},{3,7,11,15,19},{4,8,12,16,20}} iCSV1, 1 = Column0; iCSV2, 2 = 100; iCSV3, 3 = 200; iCSV4, 4 = 300; iCSV5, 5 = 400; iCSV2, 3 = 60; Export["test.csv",iCSV];</lang>

Output:

Column0,C2,C3,C4,C5
1,100,60,13,17
2,6,200,14,18
3,7,11,300,19
4,8,12,16,400

Perl 6

On the face of it this task is pretty simple. Especially given the sample CSV file and the total lack of specification of what changes to make to the file. Something like this would suffice. <lang perl6>my $csvfile = './whatever.csv'; my $fh = open($csvfile, :r); my @header = $fh.get.split(','); my @csv = map {[.split(',')]}, $fh.lines; close $fh;

my $out = open($csvfile, :w); $out.say((@header,'SUM').join(',')); $out.say((@$_, [+] @$_).join(',')) for @csv; close $out;</lang> But if your CSV file is at all complex you are better off using a CSV parsing module. <lang perl6>use Text::CSV; my $csvfile = './whatever.csv'; my @csv = Text::CSV.parse-file($file); modify(@csv); # do whatever; my $out = open($csvfile, :w); $out.say( (@$_).join(',') ) for @csv;</lang>

Python

<lang python>import fileinput

changerow, changecolumn, changevalue = 2, 4, '"Spam"'

with fileinput.input('csv_data_manipulation.csv', inplace=True) as f:

   for line in f:
       if fileinput.filelineno() == changerow:
           fields = line.rstrip().split(',')
           fields[changecolumn-1] = changevalue
           line = ','.join(fields) + '\n'
       print(line, end=)</lang>

Output:

After this the data file csv_data_manipulation.csv gets changed from that of the task to:

C1,C2,C3,C4,C5
1,5,9,"Spam",17
2,6,10,14,18
3,7,11,15,19
4,8,12,16,20

Racket

<lang racket>#lang racket (require (planet neil/csv:1:=7) net/url)

(define make-reader

 (make-csv-reader-maker
  '((separator-chars              #\,)
    (strip-leading-whitespace?  . #t)
    (strip-trailing-whitespace? . #t))))

(define (all-rows port)

 (define read-row (make-reader port))
 (define head (append (read-row) '("SUM")))
 (define rows (for/list ([row (in-producer read-row '())])
                (define xs (map string->number row))
                (append row (list (~a (apply + xs))))))
 (define (->string row) (string-join row "," #:after-last "\n"))
 (string-append* (map ->string (cons head rows))))</lang>

Example: <lang racket>(define csv-file

 "C1, C2, C3, C4, C5
   1,  5,  9, 13, 17
   2,  6, 10, 14, 18
   3,  7, 11, 15, 19
   4,  8, 12, 16, 20")

(display (all-rows (open-input-string csv-file)))</lang>

Output:

C1,C2,C3,C4,C5,SUM
1,5,9,13,17,45
2,6,10,14,18,50
3,7,11,15,19,55
4,8,12,16,20,60

REXX

<lang rexx>/* REXX ***************************************************************

extend in.csv to add a column containing the sum of the lines' elems
21.06.2013 Walter Pachl
- - - - /

csv='in.csv' Do i=1 By 1 While lines(csv)>0

 l=linein(csv)
 If i=1 Then
   l.i=l',SUM'
 Else Do
   ol=l
   sum=0
   Do While l<>
     Parse Var l e ',' l
     sum=sum+e
     End
   l.i=ol','sum
   End
 End

Call lineout csv 'erase' csv Do i=1 To i-1

 Call lineout csv,l.i
 End</lang>

Output:

C1,C2,C3,C4,C5,SUM
1,5,9,13,17,45
2,6,10,14,18,50
3,7,11,15,19,55
4,8,12,16,20,60

Run BASIC

<lang runbasic>csv$ = "C1,C2,C3,C4,C5 1,5,9,13,17 2,6,10,14,18 3,7,11,15,19 4,8,12,16,20 "

print csv$ dim csvData$(5,5)

for r = 1 to 5

 a$ = word$(csv$,r,chr$(13))
 for c = 1 to 5
   csvData$(r,c) = word$(a$,c,",")
 next c

next r

[loop] input "Row to change:";r input "Col to change;";c if r > 5 or c > 5 then

 print "Row ";r;" or Col ";c;" is greater than 5"
 goto [loop]

end if input "Change Row ";r;" can Col ";c;" from ";csvData$(r,c);" to ";d$ csvData$(r,c) = d$ for r = 1 to 5

 for c = 1 to 5
   print cma$;csvData$(r,c);
   cma$ = ","
  next c
  cma$ = ""
  print

next r</lang>

C1,C2,C3,C4,C5
1,5,9,13,17
2,6,10,14,18
3,7,11,15,19
4,8,12,16,20

Row to change:?4
Col to change;?4
Change Row 4 can Col 4 from 15 to ?99
C1,C2,C3,C4,C5
1,5,9,13,17
2,6,10,14,18
3,7,11,99,19
4,8,12,16,20

Tcl

Library: Tcllib (Package: struct::matrix)

Library: Tcllib (Package: csv)

<lang tcl>package require struct::matrix package require csv

proc addSumColumn {filename {title "SUM"}} {

   set m [struct::matrix]

   # Load the CSV in
   set f [open $filename]
   csv::read2matrix $f $m "," auto
   close $f

   # Add the column with the sums
   set sumcol [$m columns]
   $m add column $title
   for {set i 1} {$i < [$m rows]} {incr i} {

# Fill out a dummy value $m set cell $sumcol $i 0 $m set cell $sumcol $i [tcl::mathop::+ {*}[$m get row $i]]

   # Write the CSV out
   set f [open $filename w]
   csv::writematrix $m $f
   close $f

   $m destroy

}

addSumColumn "example.csv"</lang>

Output (in example.csv):

C1,C2,C3,C4,C5,SUM
1,5,9,13,17,45
2,6,10,14,18,50
3,7,11,15,19,55
4,8,12,16,20,60