Merge and aggregate datasets: Difference between revisions

Added FreeBASIC
m (minor edits)
(Added FreeBASIC)
 
(121 intermediate revisions by 25 users not shown)
Line 1:
{{task|Data Structures}}
{{task|Basic language learning}}Merge and aggregate datasets
 
Merge and aggregate datasets
 
 
;Task:
Merge and aggregate two datasets as provided in   '''.csv'''   files into a new resulingresulting dataset.
Use the appropriate methods and data structures depending on the programming language. Use the most common libraries only when built-in functionality is not sufficient.
Either load the data from the .csv files or create the required data structures hard-coded.
 
Use the appropriate methods and data structures depending on the programming language.
patients.csv file contents:
 
<nowiki>
Use the most common libraries only when built-in functionality is not sufficient.
 
 
;Note:
Either load the data from the &nbsp; '''.csv''' &nbsp; files or create the required data structures hard-coded.
 
 
'''patients.csv''' &nbsp; file contents:
<pre>
PATIENT_ID,LASTNAME
1001,Hopper
Line 14 ⟶ 24:
2002,Gosling
5005,Kurtz
</nowikipre>
 
 
visits.csv file contents:
'''visits.csv''' &nbsp; file contents:
<nowiki>PATIENT_ID,VISIT_DATE,SCORE
<pre>
PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
Line 26 ⟶ 38:
4004,2020-11-05,7.0
1001,2020-11-19,5.3
</nowikipre>
 
 
Create a resulting dataset in-memory or output it to screen or file, whichever is appropriate for the programming language at hand.
 
Merge and group per patient id and name, get the maximum visit date, get the sum of scores per patient and the average score per patient to get the resulting datasset:
Merge and group per patient id and last name, &nbsp; get the maximum visit date, &nbsp; and get the sum and average of the scores per patient to get the resulting dataset.
<nowiki>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
 
 
Note that the visit date is purposefully provided as ISO format, &nbsp; so that it could also be processed as text and sorted alphabetically to determine the maximum date.
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
Line 36 ⟶ 54:
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |
</nowikipre>
 
 
;Note:
This task is aimed in particular at programming languages that are used in data science and data processing, such as F#, Python, R, SPSS, MATLAB etc.
 
 
;Related tasks:
* &nbsp; [[DataCSV data Structuresmanipulation]]
* [[CSV to HTML translation]]
* &nbsp; [[Database operations]]
* [[Read entire file]]
* [[Read a file line by line]]
<br><br>
 
=={{header|R11l}}==
{{trans|Python: Stdlib csv only}}
 
<syntaxhighlight lang="11l">V patients_csv =
<lang R># load datasets from file
‘PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz’
 
V visits_csv =
‘PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3’
 
F csv2list(s)
[[String]] rows
L(row) s.split("\n")
rows [+]= row.split(‘,’)
R rows
 
V patients = csv2list(patients_csv)
V visits = csv2list(visits_csv)
 
V result = copy(patients)
result.sort_range(1..)
result[0].append(‘LAST_VISIT’)
V last = Dict(visits[1..], p_vis -> (p_vis[0], p_vis[1]))
L(record) 1 .< result.len
result[record].append(last.get(result[record][0], ‘’))
result[0] [+]= [‘SCORE_SUM’, ‘SCORE_AVG’]
 
V n = Dict(patients[1..], p -> (p[0], 0))
V tot = Dict(patients[1..], p -> (p[0], 0.0))
 
L(record) visits[1..]
V p = record[0]
V score = record[2]
I !score.empty
n[p]++
tot[p] += Float(score)
 
L(record) 1 .< result.len
V p = result[record][0]
I n[p] != 0
result[record] [+]= [‘#3.1’.format(tot[p]), ‘#2.2’.format(tot[p] / n[p])]
E
result[record] [+]= [‘’, ‘’]
 
L(record) result
print(‘| ’record.map(r -> r.center(10)).join(‘ | ’)‘ |’)</syntaxhighlight>
 
{{out}}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |
</pre>
 
=={{header|AutoHotkey}}==
<syntaxhighlight lang="autohotkey">Merge_and_aggregate(patients, visits){
ID := [], LAST_VISIT := [], SCORE_SUM := [], VISIT := []
for i, line in StrSplit(patients, "`n", "`r"){
if (i=1)
continue
x := StrSplit(line, ",")
ID[x.1] := x.2
}
 
for i, line in StrSplit(visits, "`n", "`r"){
if (i=1)
continue
x := StrSplit(line, ",")
LAST_VISIT[x.1] := x.2 > LAST_VISIT[x.1] ? x.2 : LAST_VISIT[x.1]
SCORE_SUM[x.1] := (SCORE_SUM[x.1] ? SCORE_SUM[x.1] : 0) + (x.3 ? x.3 : 0)
if x.3
VISIT[x.1] := (VISIT[x.1] ? VISIT[x.1] : 0) + 1
}
 
output := "PATIENT_ID`tLASTNAME`tLAST_VISIT`tSCORE_SUM`tSCORE_AVG`n"
for id, name in ID
output .= ID "`t" name "`t" LAST_VISIT[id] "`t" SCORE_SUM[id] "`t" SCORE_SUM[id]/VISIT[id] "`n"
return output
}</syntaxhighlight>
Examples:<syntaxhighlight lang="autohotkey">patients =
(
PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
)
 
visits =
(
PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3
)
 
MsgBox % Merge_and_aggregate(patients, visits)
return</syntaxhighlight>
{{out}}
<pre>PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG
1001 Hopper 2020-11-19 17.400000 5.800000
2002 Gosling 2020-10-08 6.800000 6.800000
3003 Kemeny 2020-11-12 0
4004 Wirth 2020-11-05 15.400000 7.700000
5005 Kurtz</pre>
 
=={{header|AWK}}==
<syntaxhighlight lang="awk">
# syntax: GAWK -f MERGE_AND_AGGREGATE_DATASETS.AWK RC-PATIENTS.CSV RC-VISITS.CSV
# files may appear in any order
#
# sorting:
# PROCINFO["sorted_in"] is used by GAWK
# SORTTYPE is used by Thompson Automation's TAWK
#
{ # printf("%s %s\n",FILENAME,$0) # print input
split($0,arr,",")
if (FNR == 1) {
file = (arr[2] == "LASTNAME") ? "patients" : "visits"
next
}
patient_id_arr[key] = key = arr[1]
if (file == "patients") {
lastname_arr[key] = arr[2]
}
else if (file == "visits") {
if (arr[2] > visit_date_arr[key]) {
visit_date_arr[key] = arr[2]
}
if (arr[3] != "") {
score_arr[key] += arr[3]
score_count_arr[key]++
}
}
}
END {
print("")
PROCINFO["sorted_in"] = "@ind_str_asc" ; SORTTYPE = 1
fmt = "%-10s %-10s %-10s %9s %9s %6s\n"
printf(fmt,"patient_id","lastname","last_visit","score_sum","score_avg","scores")
for (i in patient_id_arr) {
avg = (score_count_arr[i] > 0) ? score_arr[i] / score_count_arr[i] : ""
printf(fmt,patient_id_arr[i],lastname_arr[i],visit_date_arr[i],score_arr[i],avg,score_count_arr[i]+0)
}
exit(0)
}
</syntaxhighlight>
{{out}}
<pre>
patient_id lastname last_visit score_sum score_avg scores
1001 Hopper 2020-11-19 17.4 5.8 3
2002 Gosling 2020-10-08 6.8 6.8 1
3003 Kemeny 2020-11-12 0
4004 Wirth 2020-11-05 15.4 7.7 2
5005 Kurtz 0
</pre>
=={{header|C++}}==
Uses C++20
<syntaxhighlight lang="cpp">#include <iostream>
#include <optional>
#include <ranges>
#include <string>
#include <vector>
 
using namespace std;
 
struct Patient
{
string ID;
string LastName;
};
 
struct Visit
{
string PatientID;
string Date;
optional<float> Score;
};
 
int main(void)
{
auto patients = vector<Patient> {
{"1001", "Hopper"},
{"4004", "Wirth"},
{"3003", "Kemeny"},
{"2002", "Gosling"},
{"5005", "Kurtz"}};
 
auto visits = vector<Visit> {
{"2002", "2020-09-10", 6.8},
{"1001", "2020-09-17", 5.5},
{"4004", "2020-09-24", 8.4},
{"2002", "2020-10-08", },
{"1001", "" , 6.6},
{"3003", "2020-11-12", },
{"4004", "2020-11-05", 7.0},
{"1001", "2020-11-19", 5.3}};
 
// sort the patients by ID
sort(patients.begin(), patients.end(),
[](const auto& a, const auto&b){ return a.ID < b.ID;});
 
cout << "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |\n";
for(const auto& patient : patients)
{
// loop over all of the patients and determine the fields
string lastVisit;
float sum = 0;
int numScores = 0;
// use C++20 ranges to filter the visits by patients
auto patientFilter = [&patient](const Visit &v){return v.PatientID == patient.ID;};
for(const auto& visit : visits | views::filter( patientFilter ))
{
if(visit.Score)
{
sum += *visit.Score;
numScores++;
}
lastVisit = max(lastVisit, visit.Date);
}
// format the output
cout << "| " << patient.ID << " | ";
cout.width(8); cout << patient.LastName << " | ";
cout.width(10); cout << lastVisit << " | ";
if(numScores > 0)
{
cout.width(9); cout << sum << " | ";
cout.width(9); cout << (sum / float(numScores));
}
else cout << " | ";
cout << " |\n";
}
}</syntaxhighlight>
 
{{out}}
 
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | | | |
</pre>
=={{header|C sharp}}==
<syntaxhighlight lang="csharp">using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Runtime.Serialization;
 
public static class MergeAndAggregateDatasets
{
public static void Main()
{
string patientsCsv = @"
PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz";
 
string visitsCsv = @"
PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3";
 
string format = "yyyy-MM-dd";
var formatProvider = new DateTimeFormat(format).FormatProvider;
 
var patients = ParseCsv(
patientsCsv.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries),
line => (PatientId: int.Parse(line[0]), LastName: line[1]));
 
var visits = ParseCsv(
visitsCsv.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries),
line => (
PatientId: int.Parse(line[0]),
VisitDate: DateTime.TryParse(line[1], formatProvider, DateTimeStyles.None, out var date) ? date : default(DateTime?),
Score: double.TryParse(line[2], out double score) ? score : default(double?)
)
);
 
var results =
patients.GroupJoin(visits,
p => p.PatientId,
v => v.PatientId,
(p, vs) => (
p.PatientId,
p.LastName,
LastVisit: vs.Max(v => v.VisitDate),
ScoreSum: vs.Sum(v => v.Score),
ScoreAvg: vs.Average(v => v.Score)
)
).OrderBy(r => r.PatientId);
 
Console.WriteLine("| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |");
foreach (var r in results) {
Console.WriteLine($"| {r.PatientId,-10} | {r.LastName,-8} | {r.LastVisit?.ToString(format) ?? "",-10} | {r.ScoreSum,9} | {r.ScoreAvg,9} |");
}
}
 
private static IEnumerable<T> ParseCsv<T>(string[] contents, Func<string[], T> constructor)
{
for (int i = 1; i < contents.Length; i++) {
var line = contents[i].Split(',');
yield return constructor(line);
}
}
 
}</syntaxhighlight>
{{out}}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 3003 | Kemeny | 2020-11-12 | 0 | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | | 0 | |</pre>
 
=={{header|Datalog}}==
The following implementation is for Souffle. Souffle does not currently include dates and times in the base distribution, but it is straightforward to use C timestamps:
<syntaxhighlight lang="c">// datetime.cpp
#include <ctime>
#include <cstdint>
extern "C" {
int64_t from date(const char* string) {
struct tm tmInfo = {0};
strptime(string, "%Y-%m-%d", &tmInfo);
return mktime(&tmInfo); // localtime
}
}</syntaxhighlight>
 
Rather than combine the summaries, the date and score summaries have been presented separately.
<syntaxhighlight lang="prolog">/* rosetta.dl */
#define NaN to_float("NaN")
.functor from_date(date:symbol) : number
.decl Patient(id:number, lastname:symbol)
.decl Visit(id:number, date:symbol, score:float)
.decl SummaryDates(id:number, lastname:symbol, last_date:symbol)
.decl SummaryScores(id:number, lastname:symbol, score_sum:float, score_mean:float)
.decl MissingDates(x:number)
Patient(1001,"Hopper").
Patient(4004,"Wirth").
Patient(3003,"Kemeny").
Patient(2002,"Gosling").
Patient(5005,"Kurtz").
 
Visit(2002,"2020-09-10",6.8).
Visit(1001,"2020-09-17",5.5).
Visit(4004,"2020-09-24",8.4).
Visit(2002,"2020-10-08",NaN).
Visit(1001,"",6.6).
Visit(3003,"2020-11-12",NaN).
Visit(4004,"2020-11-05",7.0).
Visit(1001,"2020-11-19",5.3).
 
MissingDates(@from_date("")) :- true.
 
SummaryDates(id, lastname, last_date) :-
Patient(id,lastname),
last_timestamp = max ts: {Visit(id, date, _), ts = @from_date(date), !MissingDates(ts)},
Visit(id, last_date, _), last_timestamp = @from_date(last_date).
SummaryScores(id, lastname, score_sum, score_mean) :-
Patient(id,lastname),
score_sum = sum score: {Visit(id, _, score), score != NaN},
score_mean = mean score: {Visit(id, _, score), score != NaN}.
.output SummaryDates
.output SummaryScores</syntaxhighlight>
 
Then this is called using:
<syntaxhighlight lang="bash">g++ -shared -fPIC datetime.cpp -o libfunctors.so
souffle -D- rosetta.dl</syntaxhighlight>
{{output}}
<pre>---------------
SummaryScores
id lastname score_sum score_mean
===============
1001 Hopper 17.399999999999999 5.7999999999999998
2002 Gosling 6.7999999999999998 6.7999999999999998
4004 Wirth 15.4 7.7000000000000002
===============
---------------
SummaryDates
id lastname last_date
===============
1001 Hopper 2020-11-19
2002 Gosling 2020-10-08
3003 Kemeny 2020-11-12
4004 Wirth 2020-11-05
===============</pre>
 
=={{header|FreeBASIC}}==
{{trans|C++}}
<syntaxhighlight lang="vbnet">Type Patient
ID As String
LastName As String
End Type
 
Type Visit
PatientID As String
Fecha As String
Score As Single
HasScore As Integer
End Type
 
Dim As Patient patients(5)
Dim As Visit visits(8)
 
patients(1).ID = "1001": patients(1).LastName = "Hopper"
patients(2).ID = "4004": patients(2).LastName = "Wirth"
patients(3).ID = "3003": patients(3).LastName = "Kemeny"
patients(4).ID = "2002": patients(4).LastName = "Gosling"
patients(5).ID = "5005": patients(5).LastName = "Kurtz"
 
visits(1).PatientID = "2002": visits(1).Fecha = "2020-09-10": visits(1).Score = 6.8: visits(1).HasScore = -1
visits(2).PatientID = "1001": visits(2).Fecha = "2020-09-17": visits(2).Score = 5.5: visits(2).HasScore = -1
visits(3).PatientID = "4004": visits(3).Fecha = "2020-09-24": visits(3).Score = 8.4: visits(3).HasScore = -1
visits(4).PatientID = "2002": visits(4).Fecha = "2020-10-08": visits(4).HasScore = 0
visits(5).PatientID = "1001": visits(5).Fecha = "" : visits(5).Score = 6.6: visits(5).HasScore = -1
visits(6).PatientID = "3003": visits(6).Fecha = "2020-11-12": visits(6).HasScore = 0
visits(7).PatientID = "4004": visits(7).Fecha = "2020-11-05": visits(7).Score = 7.0: visits(7).HasScore = -1
visits(8).PatientID = "1001": visits(8).Fecha = "2020-11-19": visits(8).Score = 5.3: visits(8).HasScore = -1
 
Print "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |"
For i As Integer = 1 To 5
Dim As String lastVisit = ""
Dim As Single sum = 0
Dim As Integer numScores = 0
For j As Integer = 1 To 8
If patients(i).ID = visits(j).PatientID Then
If visits(j).HasScore Then
sum += visits(j).Score
numScores += 1
End If
If visits(j).Fecha > lastVisit Then
lastVisit = visits(j).Fecha
End If
End If
Next j
Print "| "; patients(i).ID; " | ";
Print Using "\ \ | \ \ | "; patients(i).LastName; lastVisit;
If numScores > 0 Then
Print Using "#######.# | #######.#"; sum; (sum / Csng(numScores));
Else
Print " | ";
End If
Print " |"
Next i
 
Sleep</syntaxhighlight>
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 3003 | Kemeny | 2020-11-12 | | |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 5005 | Kurtz | | | |</pre>
 
=={{header|F_Sharp|F#}}==
Note that the scores are right justified to copy the task description. It would be more natural to leave them right justified.
<syntaxhighlight lang="fsharp">
// Merge and aggregate datasets. Nigel Galloway: January 6th., 2021
let rFile(fName)=seq{use n=System.IO.File.OpenText(fName)
n.ReadLine() |> ignore
while not n.EndOfStream do yield n.ReadLine().Split [|','|]}
let N=rFile("file1.txt") |> Seq.sort
let G=rFile("file2.txt") |> Seq.groupBy(fun n->n.[0]) |> Map.ofSeq
let fN n i g e l=printfn "| %-10s | %-8s | %10s | %-9s | %-9s |" n i g e l
let fG n g=let z=G.[n]|>Seq.sumBy(fun n->try float n.[2] with :? System.FormatException->0.0)
fN n g (G.[n]|>Seq.sort|>Seq.last).[1] (if z=0.0 then "" else string z) (if z=0.0 then "" else string(z/(float(Seq.length G.[n]))))
</syntaxhighlight>
{{out}}
<pre>
printfn "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |"
N|>Seq.iter(fun n->match G.ContainsKey n.[0] with true->fG n.[0] n.[1] |_->fN n.[0] n.[1] "" "" "")
 
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 3.4 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | | | |
</pre>
=={{header|Go}}==
{{trans|Wren}}
<syntaxhighlight lang="go">package main
 
import (
"fmt"
"math"
"sort"
)
 
type Patient struct {
id int
lastName string
}
 
// maps an id to a lastname
var patientDir = make(map[int]string)
 
// maintains a sorted list of ids
var patientIds []int
 
func patientNew(id int, lastName string) Patient {
patientDir[id] = lastName
patientIds = append(patientIds, id)
sort.Ints(patientIds)
return Patient{id, lastName}
}
 
type DS struct {
dates []string
scores []float64
}
 
type Visit struct {
id int
date string
score float64
}
 
// maps an id to lists of dates and scores
var visitDir = make(map[int]DS)
 
func visitNew(id int, date string, score float64) Visit {
if date == "" {
date = "0000-00-00"
}
v, ok := visitDir[id]
if ok {
v.dates = append(v.dates, date)
v.scores = append(v.scores, score)
visitDir[id] = DS{v.dates, v.scores}
} else {
visitDir[id] = DS{[]string{date}, []float64{score}}
}
return Visit{id, date, score}
}
 
type Merge struct{ id int }
 
func (m Merge) lastName() string { return patientDir[m.id] }
func (m Merge) dates() []string { return visitDir[m.id].dates }
func (m Merge) scores() []float64 { return visitDir[m.id].scores }
 
func (m Merge) lastVisit() string {
dates := m.dates()
dates2 := make([]string, len(dates))
copy(dates2, dates)
sort.Strings(dates2)
return dates2[len(dates2)-1]
}
 
func (m Merge) scoreSum() float64 {
sum := 0.0
for _, score := range m.scores() {
if score != -1 {
sum += score
}
}
return sum
}
 
func (m Merge) scoreAvg() float64 {
count := 0
for _, score := range m.scores() {
if score != -1 {
count++
}
}
return m.scoreSum() / float64(count)
}
 
func mergePrint(merges []Merge) {
fmt.Println("| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |")
f := "| %d | %-7s | %s | %4s | %4s |\n"
for _, m := range merges {
_, ok := visitDir[m.id]
if ok {
lv := m.lastVisit()
if lv == "0000-00-00" {
lv = " "
}
scoreSum := m.scoreSum()
ss := fmt.Sprintf("%4.1f", scoreSum)
if scoreSum == 0 {
ss = " "
}
scoreAvg := m.scoreAvg()
sa := " "
if !math.IsNaN(scoreAvg) {
sa = fmt.Sprintf("%4.2f", scoreAvg)
}
fmt.Printf(f, m.id, m.lastName(), lv, ss, sa)
} else {
fmt.Printf(f, m.id, m.lastName(), " ", " ", " ")
}
}
}
 
func main() {
patientNew(1001, "Hopper")
patientNew(4004, "Wirth")
patientNew(3003, "Kemeny")
patientNew(2002, "Gosling")
patientNew(5005, "Kurtz")
 
visitNew(2002, "2020-09-10", 6.8)
visitNew(1001, "2020-09-17", 5.5)
visitNew(4004, "2020-09-24", 8.4)
visitNew(2002, "2020-10-08", -1) // -1 signifies no score
visitNew(1001, "", 6.6) // "" signifies no date
visitNew(3003, "2020-11-12", -1)
visitNew(4004, "2020-11-05", 7.0)
visitNew(1001, "2020-11-19", 5.3)
 
merges := make([]Merge, len(patientIds))
for i, id := range patientIds {
merges[i] = Merge{id}
}
mergePrint(merges)
}</syntaxhighlight>
 
{{out}}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |
</pre>
 
=={{header|Haskell}}==
 
===Reading and merging===
 
Merging of fields and databases is defined as a monoid operation for corresponding types.
 
<syntaxhighlight lang="haskell">import Data.List
import Data.Maybe
import System.IO (readFile)
import Text.Read (readMaybe)
import Control.Applicative ((<|>))
 
------------------------------------------------------------
 
newtype DB = DB { entries :: [Patient] }
deriving Show
 
instance Semigroup DB where
DB a <> DB b = normalize $ a <> b
 
instance Monoid DB where
mempty = DB []
 
normalize :: [Patient] -> DB
normalize = DB
. map mconcat
. groupBy (\x y -> pid x == pid y)
. sortOn pid
------------------------------------------------------------
 
data Patient = Patient { pid :: String
, name :: Maybe String
, visits :: [String]
, scores :: [Float] }
deriving Show
 
instance Semigroup Patient where
Patient p1 n1 v1 s1 <> Patient p2 n2 v2 s2 =
Patient (fromJust $ Just p1 <|> Just p2)
(n1 <|> n2)
(v1 <|> v2)
(s1 <|> s2)
 
instance Monoid Patient where
mempty = Patient mempty mempty mempty mempty
------------------------------------------------------------
 
readDB :: String -> DB
readDB = normalize
. mapMaybe readPatient
. readCSV
 
readPatient r = do
i <- lookup "PATIENT_ID" r
let n = lookup "LASTNAME" r
let d = lookup "VISIT_DATE" r >>= readDate
let s = lookup "SCORE" r >>= readMaybe
return $ Patient i n (maybeToList d) (maybeToList s)
where
readDate [] = Nothing
readDate d = Just d
 
readCSV :: String -> [(String, String)]
readCSV txt = zip header <$> body
where
header:body = splitBy ',' <$> lines txt
splitBy ch = unfoldr go
where
go [] = Nothing
go s = Just $ drop 1 <$> span (/= ch) s</syntaxhighlight>
 
<pre>let patients = readDB <$> readFile "patients.csv"
*Main> let visits = readDB <$> readFile "visits.csv"
 
*Main> mapM_ print . entries =<< patients
Patient {pid = "1001", name = Just "Hopper", visits = [], scores = []}
Patient {pid = "2002", name = Just "Gosling", visits = [], scores = []}
Patient {pid = "3003", name = Just "Kemeny", visits = [], scores = []}
Patient {pid = "4004", name = Just "Wirth", visits = [], scores = []}
Patient {pid = "5005", name = Just "Kurtz", visits = [], scores = []}
 
*Main> mapM_ print . entries =<< visits
Patient {pid = "1001", name = Nothing, visits = ["2020-09-17","2020-11-19"], scores = [5.3,6.6,5.5]}
Patient {pid = "2002", name = Nothing, visits = ["2020-09-10","2020-10-08"], scores = [6.8]}
Patient {pid = "3003", name = Nothing, visits = ["2020-11-12"], scores = []}
Patient {pid = "4004", name = Nothing, visits = ["2020-09-24","2020-11-05"], scores = [7.0,8.4]}
 
*Main> mapM_ print . entries =<< patients <> visits
Patient {pid = "1001", name = Just "Hopper", visits = ["2020-09-17","2020-11-19"], scores = [5.3,6.6,5.5]}
Patient {pid = "2002", name = Just "Gosling", visits = ["2020-09-10","2020-10-08"], scores = [6.8]}
Patient {pid = "3003", name = Just "Kemeny", visits = ["2020-11-12"], scores = []}
Patient {pid = "4004", name = Just "Wirth", visits = ["2020-09-24","2020-11-05"], scores = [7.0,8.4]}
Patient {pid = "5005", name = Just "Kurtz", visits = [], scores = []}</pre>
 
===Pretty tabulation===
 
<syntaxhighlight lang="haskell">
tabulateDB (DB ps) header cols = intercalate "|" <$> body
where
body = transpose $ zipWith pad width table
table = transpose $ header : map showPatient ps
showPatient p = sequence cols p
width = maximum . map length <$> table
pad n col = (' ' :) . take (n+1) . (++ repeat ' ') <$> col
 
main = do
a <- readDB <$> readFile "patients.csv"
b <- readDB <$> readFile "visits.csv"
mapM_ putStrLn $ tabulateDB (a <> b) header fields
where
header = [ "PATIENT_ID", "LASTNAME", "VISIT_DATE"
, "SCORES SUM","SCORES AVG"]
fields = [ pid
, fromMaybe [] . name
, \p -> case visits p of {[] -> []; l -> last l}
, \p -> case scores p of {[] -> []; s -> show (sum s)}
, \p -> case scores p of {[] -> []; s -> show (mean s)} ]
 
mean lst = sum lst / genericLength lst</syntaxhighlight>
 
<pre>*Main> main
PATIENT_ID | LASTNAME | VISIT_DATE | SCORES SUM | SCORES AVG
1001 | Hopper | 2020-11-19 | 17.4 | 5.7999997
2002 | Gosling | 2020-10-08 | 6.8 | 6.8
3003 | Kemeny | 2020-11-12 | |
4004 | Wirth | 2020-11-05 | 15.4 | 7.7
5005 | Kurtz | | | </pre>
 
=={{header|Harbour}}==
 
Harbour does not have special values for NA or NaN, and missing numerical values are represented as zeros. In the following, we have used -999 for missing scores.
 
<syntaxhighlight lang="xbase">
#xcommand INSERT INTO <table> ( <uField1>[, <uFieldN> ] ) VALUE ( <uVal1>[, <uValN> ] ) => ;
<table>->(dbAppend()); <table>-><uField1> := <uVal1> [; <table>-><uFieldN> := <uValN>]
#xcommand INSERT INTO <table> ( <uFieldList,...> ) VALUES ( <uValList1,...> ) [, ( <uValListN,...> )] => ;
INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValList1> ) ;
[; INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValListN> )]
&& Singular cases (so we can use VALUES for all instances)
#xcommand INSERT INTO <table> (<uField>) VALUE (<uVal>) => ;
<table>->(dbAppend()); <table>-><uField> := <uVal>
#xcommand INSERT INTO <table> ( <uFieldList,...> ) VALUES ( <uValList,...> ) => ;
INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValList> )
 
PROCEDURE Main()
LOCAL pStruct, vStruct, rStruct, xCurId, aAgg
 
SET DATE FORMAT "yyyy-mm-dd"
 
&& create and populate the patient table
pStruct := {{"patient_id", "n", 8, 0}, {"lastname", "c", 10, 0 }}
dbCreate( "patient", pStruct,, .T., "patient" )
INSERT INTO patient (patient_id, lastname) VALUES (1001, "Hopper"), (4004, "Wirth"), ;
(3003, "Kemeny"), (2002, "Gosling"), (5005, "Kurtz")
INDEX ON patient_id TO pat_id
&& create and populate the visit table
vStruct := {{"patient_id", "n", 8, 0}, {"visit_date", "d", 10, 0}, {"score", "n", 8, 1}}
dbCreate( "visit", vStruct,, .T., "visit" )
INSERT INTO visit (patient_id, visit_date, score) VALUES (2002, ctod("2020-09-10"), 6.8), ;
(1001, ctod("2020-09-17"), 5.5), (4004, ctod("2020-09-24"), 8.4), ;
(2002, ctod("2020-10-08"), -999), (1001, ctod("1900-01-01"), 6.6), ;
(3003, ctod("2020-11-12"), -999), (4004, ctod("2020-11-05"), 7.0), ;
(1001, ctod("2020-11-19"), 5.3)
INDEX ON patient_id TO visit_id
 
&& create the result table
rStruct := { {"patient_id", "n", 8, 0}, ;
{"n", "i", 8, 0}, {"sum_score", "n", 8, 1}, ;
{"avg_score", "n", 8, 1}, {"max_date", "d", 10, 0}}
dbCreate("report", rStruct,, .T., "report")
 
SELECT visit
DO WHILE ! Eof()
xCurId := patient_id && grouping variable
aAgg := {0, 0, 0.0, ctod("1900-01-01")} && initial values
DO WHILE ! Eof() .AND. xCurId == patient_id
aAgg := {1+aAgg[1], iif(score==-999,aAgg[2],1+aAgg[2]), ;
iif(score==-999, aAgg[3], score+aAgg[3]), max(visit_date, aAgg[4])} && update
SKIP
ENDDO
INSERT INTO report (patient_id, n, sum_score, avg_score, max_date) ;
VALUES (xCurId, aAgg[1], aAgg[3], aAgg[3]/aAgg[2], aAgg[4])
ENDDO
 
SELECT report
INDEX ON patient_id TO report_id
 
SELECT patient
SET RELATION TO patient_id INTO report
? "NUM", "PATIENT_ID", "LASTNAME", "N", "SUM_SCORE", "AVG_SCORE", "MAX_DATE"
LIST patient_id, lastname, report->n, report->sum_score, report->avg_score, report->max_date
 
RETURN</syntaxhighlight>
 
With output:
 
<pre>NUM PATIENT_ID LASTNAME N SUM_SCORE AVG_SCORE MAX_DATE
1 1001 Hopper 3 17.4 5.8 2020-11-19
4 2002 Gosling 2 6.8 6.8 2020-10-08
3 3003 Kemeny 1 0.0 0.0 2020-11-12
2 4004 Wirth 2 15.4 7.7 2020-11-05
5 5005 Kurtz 0 0.0 0.0 - - </pre>
 
=={{header|J}}==
 
One approach here would be to use [[j:Jd/Index|Jd]]
 
In other words, we can set things up like this:
<syntaxhighlight lang="j">NB. setup:
require'jd pacman'
load JDP,'tools/csv_load.ijs'
F=: jpath '~temp/rosettacode/example/CSV'
jdcreatefolder_jd_ CSVFOLDER=: F
 
assert 0<{{)n
PATIENTID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
}} fwrite F,'patients.csv'
 
assert 0<{{)n
PATIENTID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3
}} fwrite F,'visits.csv'
 
csvprepare 'patients';F,'patients.csv'
csvprepare 'visits';F,'visits.csv'
 
csvload 'patients';1
csvload 'visits';1
 
jd'ref patients PATIENTID visits PATIENTID'</syntaxhighlight>
 
And, then we can run our query:
 
<syntaxhighlight lang="j">require'jd'
 
echo jd {{)n
reads
PATIENT_ID: first p.PATIENTID,
LASTNAME: first p.LASTNAME,
LAST_VISIT: max v.VISIT_DATE,
SCORE_SUM: sum v.SCORE,
SCORE_AVG: avg v.SCORE
by
p.PATIENTID
from
p:patients,
v:p.visits
}} -.LF</syntaxhighlight>
 
Which displays this result:
<pre>┌───────────┬──────────┬────────┬──────────┬─────────┬─────────┐
│p.PATIENTID│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│
├───────────┼──────────┼────────┼──────────┼─────────┼─────────┤
│1001 │1001 │Hopper │2020-09-17│5.5 │5.5 │
│4004 │4004 │Wirth │2020-09-24│8.4 │8.4 │
│3003 │3003 │Kemeny │2020-11-12│ __ │ __ │
│2002 │2002 │Gosling │2020-09-10│6.8 │6.8 │
│5005 │5005 │Kurtz │? │ 0 │ 0 │
└───────────┴──────────┴────────┴──────────┴─────────┴─────────┘</pre>
 
Another approach would be to use J's csv library:
 
<syntaxhighlight lang="j">require'csv'
patients=: fixcsv {{)n
PATIENTID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
}}
 
visits=: fixcsv {{)n
PATIENTID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3
}}
 
task=: {{
P=. <@:>"1|:/:~}.patients
V=. <@:>"1|:/:~}.visits
id=. 0 {:: P
nm=. 1 {:: P
sel1=. (0 {:: P) e. 0 {:: V
sel2=. (~.0 {:: V) e. 0 {:: P NB. unnecessary for this example
exp=. sel1 #inv sel2 # ]
agg=. /.(&.:".)
vdt=. exp (0 {:: V) {:/. 1 {:: V
sum=. exp ":,.(0 {:: V) +//. 0". 2 {:: V
avg=. exp ":,.(0 {:: V) (+/%#)/. 0". 2 {:: V
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG'
labels,:id;nm;vdt;sum;avg
}}</syntaxhighlight>
 
Here:
<syntaxhighlight lang="j"> task''
┌──────────┬────────┬──────────┬─────────┬─────────┐
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│
├──────────┼────────┼──────────┼─────────┼─────────┤
│1001 │Hopper │2020-11-19│17.4 │5.8 │
│2002 │Gosling │2020-10-08│ 6.8 │3.4 │
│3003 │Kemeny │2020-11-12│ 0 │ 0 │
│4004 │Wirth │2020-11-05│15.4 │7.7 │
│5005 │Kurtz │ │ │ │
└──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight>
 
If the empty score in visits was a display concern, we might instead do it this way:
 
<syntaxhighlight lang="j">task=: {{
P=. <@:>"1|:/:~}.patients
V=. <@:>"1|:/:~}.visits
id=. 0 {:: P
nm=. 1 {:: P
sel1=. (0 {:: P) e. 0 {:: V
sel2=. (~.0 {:: V) e. 0 {:: P NB. unnecessary for this example
exp=. sel1 #inv sel2 # ]
agg=. /.(&.:".)
vdt=. exp (0 {:: V) {:/. 1 {:: V
sel3=. (0 {:: V) +.//. 2 *@#@{::"1 }.visits
exp2=: [:exp sel3 #inv sel3 #]
sum=. exp2 ":,.(0 {:: V) +//. 0". 2 {:: V
avg=. exp2 ":,.(0 {:: V) (+/%#)/. 0". 2 {:: V
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG'
labels,:id;nm;vdt;sum;avg
}}</syntaxhighlight>
 
Which gives us:
 
<syntaxhighlight lang="j"> task''
┌──────────┬────────┬──────────┬─────────┬─────────┐
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│
├──────────┼────────┼──────────┼─────────┼─────────┤
│1001 │Hopper │2020-11-19│17.4 │5.8 │
│2002 │Gosling │2020-10-08│ 6.8 │3.4 │
│3003 │Kemeny │2020-11-12│ │ │
│4004 │Wirth │2020-11-05│15.4 │7.7 │
│5005 │Kurtz │ │ │ │
└──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.DoubleSummaryStatistics;
import java.util.List;
 
public final class MergeAndAggregateDatasets {
 
public static void main(String[] args) {
List<Patient> patients = Arrays.asList(
new Patient("1001", "Hopper"),
new Patient("4004", "Wirth"),
new Patient("3003", "Kemeny"),
new Patient("2002", "Gosling"),
new Patient("5005", "Kurtz") );
 
List<Visit> visits = Arrays.asList(
new Visit("2002", "2020-09-10", 6.8),
new Visit("1001", "2020-09-17", 5.5),
new Visit("4004", "2020-09-24", 8.4),
new Visit("2002", "2020-10-08", null),
new Visit("1001", "" , 6.6),
new Visit("3003", "2020-11-12", null),
new Visit("4004", "2020-11-05", 7.0),
new Visit("1001", "2020-11-19", 5.3) );
Collections.sort(patients, Comparator.comparing(Patient::patientID));
System.out.println("| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |");
for ( Patient patient : patients ) {
List<Visit> patientVisits = visits.stream().filter( v -> v.visitID == patient.patientID() ).toList();
String lastVisit = patientVisits.stream()
.map( v -> v.visitDate ).max(Comparator.naturalOrder()).orElseGet( () -> " None " );
DoubleSummaryStatistics statistics = patientVisits.stream()
.filter( v -> v.score != null ).mapToDouble(Visit::score).summaryStatistics();
double scoreSum = statistics.getSum();
double scoreAverage = statistics.getAverage();
String patientDetails = String.format("%12s%11s%13s%12.2f%12.2f",
patient.patientID, patient.lastName, lastVisit, scoreSum, scoreAverage);
System.out.println(patientDetails);
}
 
private static record Patient(String patientID, String lastName) {};
private static record Visit(String visitID, String visitDate, Double score) {};
 
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
1001 Hopper 2020-11-19 17.40 5.80
2002 Gosling 2020-10-08 6.80 6.80
3003 Kemeny 2020-11-12 0.00 0.00
4004 Wirth 2020-11-05 15.40 7.70
5005 Kurtz None 0.00 0.00
</pre>
 
=={{header|jq}}==
{{works with|jq}}
'''Works with gojq, the Go implementation of jq'''
 
In the context of jq, a relational dataset "table" is naturally represented as an array of JSON objects, each representing a row in the table.
When displaying such a table, we will only show the constituent rows.
Null values will be represented by JSON's `null`.
 
'''Ingesting CSV data'''
<syntaxhighlight lang="jq">
# objectify/1 takes an array of atomic values as inputs, and packages
# these into an object with keys specified by the "headers" array and
# values obtained by trimming string values, replacing empty strings
# by null, and converting strings to numbers if possible.
def objectify(headers):
def tonumberq: tonumber? // .;
def trimq: if type == "string" then sub("^ +";"") | sub(" +$";"") else . end;
def tonullq: if . == "" then null else . end;
. as $in
| reduce range(0; headers|length) as $i
({}; .[headers[$i]] = ($in[$i] | trimq | tonumberq | tonullq) );
 
def csv2jsonHelper:
.[0] as $headers
| reduce (.[1:][] | select(length > 0) ) as $row
([]; . + [ $row|objectify($headers) ]);
 
</syntaxhighlight>
'''Aggregation functions'''
<syntaxhighlight lang="jq">
# output {LAST_VISIT}
def LAST_VISIT($patient_id):
{LAST_VISIT: (map(select( .PATIENT_ID == $patient_id).VISIT_DATE) | max)};
 
# output {SCORE_SUM, SCORE_AVG}
def SCORE_SUMMARY($patient_id):
map(select( .PATIENT_ID == $patient_id).SCORE)
| {SCORE_SUM: add, count: length}
| {SCORE_SUM, SCORE_AVG: (if .SCORE_SUM and .count > 0 then .SCORE_SUM/.count else null end)};
</syntaxhighlight>'''The task'''<syntaxhighlight lang="jq">
# Read the two tables:
INDEX($patients | [splits("\n")] | map(split(",")) | csv2jsonHelper[]; .PATIENT_ID) as $patients
| ($visits | [splits("\n")] | map(split(",")) | csv2jsonHelper) as $visits
# Construct the new table:
| $visits
| map(.PATIENT_ID as $PATIENT_ID
| {$PATIENT_ID} +
($visits | {LASTNAME: $patients[$PATIENT_ID|tostring]} + LAST_VISIT($PATIENT_ID) + SCORE_SUMMARY($PATIENT_ID)))
# ... but display it as a sequence of JSON objects
| .[]
</syntaxhighlight>
'''Invocation'''
 
jq -Rnc --rawfile patients patients.csv --rawfile visits visits.csv -f program.jq
{{out}}
<pre>
{"PATIENT_ID":2002,"LASTNAME":{"PATIENT_ID":2002,"LASTNAME":"Gosling"},"LAST_VISIT":"2020-10-08","SCORE_SUM":6.8,"SCORE_AVG":3.4}
{"PATIENT_ID":1001,"LASTNAME":{"PATIENT_ID":1001,"LASTNAME":"Hopper"},"LAST_VISIT":"2020-11-19","SCORE_SUM":17.4,"SCORE_AVG":5.8}
{"PATIENT_ID":4004,"LASTNAME":{"PATIENT_ID":4004,"LASTNAME":"Wirth"},"LAST_VISIT":"2020-11-05","SCORE_SUM":15.4,"SCORE_AVG":7.7}
{"PATIENT_ID":2002,"LASTNAME":{"PATIENT_ID":2002,"LASTNAME":"Gosling"},"LAST_VISIT":"2020-10-08","SCORE_SUM":6.8,"SCORE_AVG":3.4}
{"PATIENT_ID":1001,"LASTNAME":{"PATIENT_ID":1001,"LASTNAME":"Hopper"},"LAST_VISIT":"2020-11-19","SCORE_SUM":17.4,"SCORE_AVG":5.8}
{"PATIENT_ID":3003,"LASTNAME":{"PATIENT_ID":3003,"LASTNAME":"Kemeny"},"LAST_VISIT":"2020-11-12","SCORE_SUM":null,"SCORE_AVG":null}
{"PATIENT_ID":4004,"LASTNAME":{"PATIENT_ID":4004,"LASTNAME":"Wirth"},"LAST_VISIT":"2020-11-05","SCORE_SUM":15.4,"SCORE_AVG":7.7}
{"PATIENT_ID":1001,"LASTNAME":{"PATIENT_ID":1001,"LASTNAME":"Hopper"},"LAST_VISIT":"2020-11-19","SCORE_SUM":17.4,"SCORE_AVG":5.8}
</pre>
 
=={{header|Julia}}==
{{trans|Python}}
<syntaxhighlight lang="julia">using CSV, DataFrames, Statistics
 
# load data from csv files
#df_patients = CSV.read("patients.csv", DataFrame)
#df_visits = CSV.read("visits.csv", DataFrame)
 
# create DataFrames from text that is hard coded, so use IOBuffer(String) as input
str_patients = IOBuffer("""PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
""")
df_patients = CSV.read(str_patients, DataFrame)
str_visits = IOBuffer("""PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3
""")
df_visits = CSV.read(str_visits, DataFrame)
 
# merge on PATIENT_ID, using an :outer join or we lose Kurtz, who has no data, sort by ID
df_merge = sort(join(df_patients, df_visits, on="PATIENT_ID", kind=:outer), (:PATIENT_ID,))
 
fnonmissing(a, f) = isempty(a) ? [] : isempty(skipmissing(a)) ? a[1] : f(skipmissing(a))
 
# group by patient id / last name and then aggregate to get latest visit and mean score
df_result = by(df_merge, [:PATIENT_ID, :LASTNAME]) do df
DataFrame(LATEST_VISIT = fnonmissing(df[:VISIT_DATE], maximum),
SUM_SCORE = fnonmissing(df[:SCORE], sum),
MEAN_SCORE = fnonmissing(df[:SCORE], mean))
end
println(df_result)
</syntaxhighlight>{{out}}
<pre>
5×5 DataFrame
│ Row │ PATIENT_ID │ LASTNAME │ LATEST_VISIT │ SUM_SCORE │ MEAN_SCORE │
│ │ Int64? │ String? │ Dates.Date? │ Float64? │ Float64? │
├─────┼────────────┼──────────┼──────────────┼───────────┼────────────┤
│ 1 │ 1001 │ Hopper │ 2020-11-19 │ 17.4 │ 5.8 │
│ 2 │ 2002 │ Gosling │ 2020-10-08 │ 6.8 │ 6.8 │
│ 3 │ 3003 │ Kemeny │ 2020-11-12 │ missing │ missing │
│ 4 │ 4004 │ Wirth │ 2020-11-05 │ 15.4 │ 7.7 │
│ 5 │ 5005 │ Kurtz │ missing │ missing │ missing │
</pre>
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
<syntaxhighlight lang="mathematica">a = ImportString["PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz", "CSV"];
b = ImportString["PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3", "CSV"];
a = <|a[[1, 1]] -> #1, a[[1, 2]] -> #2|> & @@@ Rest[a];
b = <|b[[1, 1]] -> #1, b[[1, 2]] -> If[#2 != "", DateObject[#2], Missing[]], b[[1, 3]] -> If[#3 =!= "", #3, Missing[]]|> & @@@ Rest[b];
j = JoinAcross[a, b, Key["PATIENT_ID"], "Outer"];
gr = GroupBy[j, #["PATIENT_ID"] &];
<|"PATIENT_ID" -> #[[1, "PATIENT_ID"]],
"LASTNAME" -> #[[1, "LASTNAME"]],
"VISIT_DATE" -> If[DeleteMissing[#[[All, "VISIT_DATE"]]] =!= {}, Max@DeleteMissing[#[[All, "VISIT_DATE"]]], Missing[]],
"SCORE_SUM" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Total@DeleteMissing@#[[All, "SCORE"]], Missing[]],
"SCORE_AVG" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Mean@DeleteMissing@#[[All, "SCORE"]], Missing[]]|> & /@
gr // Dataset</syntaxhighlight>
{{out}}
<pre> PATIENT_ID LASTNAME VISIT_DATE SCORE_SUM SCORE_AVG
2002 2002 Gosling Thu 8 Oct 2020 6.8 6.8
1001 1001 Hopper Thu 19 Nov 2020 17.4 5.8
4004 4004 Wirth Thu 5 Nov 2020 15.4 7.7
3003 3003 Kemeny Thu 12 Nov 2020 \[LongDash] \[LongDash]
5005 5005 Kurtz \[LongDash] \[LongDash] \[LongDash]</pre>
=={{header|Mercury}}==
<syntaxhighlight lang="prolog">:- module rosetta.
 
:- interface.
:- import_module io.
:- pred main(io::di, io::uo) is det.
 
:- implementation.
:- import_module list, int, float, string, maybe, solutions.
 
:- pred patient(int::out, string::out) is multi.
patient(1001, "Hopper").
patient(4004, "Wirth").
patient(3003, "Kemeny").
patient(2002, "Gosling").
patient(5005, "Kurtz").
 
:- func nan = float.
nan = det_to_float("NaN").
 
:- type maybe_date ---> date(year::int, month::int, day::int); no.
 
:- pred visit(int::out, maybe_date::out, float::out) is multi.
visit(2002, date(2020,09,10), 6.8).
visit(1001, date(2020,09,17), 5.5).
visit(4004, date(2020,09,24), 8.4).
visit(2002, date(2020,10,08), nan).
visit(1001, no, 6.6).
visit(3003, date(2020,11,12), nan).
visit(4004, date(2020,11,05), 7.0).
visit(1001, date(2020,11,19), 5.3).
 
%% Utilities
:- pred bag_aggr(pred(T)::(pred(out) is nondet), pred(T,U,U)::pred(in,in,out) is det,
U::in, U::out) is det.
:- pred bag_count(pred(T)::(pred(out) is nondet), int::out) is det.
:- pred bag_sum(pred(float)::(pred(out) is nondet), float::out) is det.
:- pred bag_avg(pred(float)::(pred(out) is nondet), float::out) is det.
:- pred bag_max(pred(T)::(pred(out) is nondet), T::in, T::out) is det.
:- pred bag_max_date(pred(maybe_date)::(pred(out) is nondet), maybe_date::out) is det.
bag_aggr(Predicate, Aggregator, Initial, Result) :-
promise_equivalent_solutions[Result] (
unsorted_aggregate(Predicate, Aggregator, Initial, Result)).
bag_count(Predicate, Count) :-
bag_aggr(Predicate, (pred(_X::in,Y::in,Z::out) is det :- Z = Y+1), 0, Count).
bag_sum(Predicate, Sum) :-
bag_aggr(Predicate, (pred(X::in,Y::in,Z::out) is det :- Z = X+Y), 0.0, Sum).
bag_avg(Predicate, Avg) :-
bag_count(Predicate, N),
bag_sum(Predicate, Sum),
(if N = 0 then Avg = nan else Avg = Sum/float(N)).
bag_max(Predicate, Initial, Max) :-
bag_aggr(Predicate,
(pred(X::in,Y::in,Z::out) is det :-
compare(R,X,Y),
(if R = (>) then Z = X else Z = Y)),
Initial, Max).
bag_max_date(Predicate, MaxDate) :-
bag_max(Predicate, date(0,0,0), MaxDate1),
(if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1).
 
main(!IO) :-
print_line("{Id, Lastname, SumScores, AvgScores, MaxDate}:", !IO),
aggregate((pred({Id,Lastname,Sum,Avg,MaxDate}::out) is nondet :-
patient(Id,Lastname),
Scores = (pred(Score::out) is nondet :- visit(Id,_,Score), \+is_nan(Score)),
bag_avg(Scores, Avg),
bag_sum(Scores, Sum),
Dates = (pred(Date::out) is nondet :- visit(Id,Date,_), Date\=no),
bag_max_date(Dates, MaxDate)),
print_line,
!IO).</syntaxhighlight>
{{out}}
<pre>{Id, Lastname, SumScores, AvgScores, MaxDate}:
{1001, "Hopper", 17.4, 5.8, date(2020, 11, 19)}
{2002, "Gosling", 6.8, 6.8, date(2020, 10, 8)}
{3003, "Kemeny", 0.0, nan, date(2020, 11, 12)}
{4004, "Wirth", 15.4, 7.7, date(2020, 11, 5)}
{5005, "Kurtz", 0.0, nan, no}</pre>
 
=={{header|Nim}}==
===CSV files and tables===
<syntaxhighlight lang="nim">import algorithm, parsecsv, strformat, strutils, tables
 
const NoValue = -1.0
 
type
 
Names = OrderedTable[Positive, string] # Mapping id -> last name.
 
Visit = tuple[date: string; score: float]
Visits = Table[Positive, seq[Visit]] # Mapping id -> list of visits.
 
 
proc readNames(path: string): Names =
## Read the records (id, lastname) from the CSV file and fill a Names table.
var parser: CsvParser
parser.open(path)
parser.readHeaderRow()
while parser.readRow():
let id = parser.row[0].parseInt
let name = parser.row[1]
result[id] = name
 
proc readVisits(path: string): Visits =
## Read the records (id, date, score) from the CSV file and fill a Visits table.
var parser: CsvParser
parser.open(path)
parser.readHeaderRow()
while parser.readRow():
let id = parser.row[0].parseInt
let date = parser.row[1]
let score = if parser.row[2].len == 0: NoValue else: parser.row[2].parseFloat
result.mgetOrPut(id, @[]).add (date, score)
 
 
var
names = readNames("patients1.csv")
visits = readVisits("patients2.csv")
 
names.sort(system.cmp)
 
echo "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |"
for (id, name) in names.pairs:
let visitList = visits.getOrDefault(id).sorted()
let lastVisit = if visitList.len == 0: "" else: visitList[^1].date
var sum = 0.0
var count = 0
for visit in visitList:
if visit.score != NoValue:
sum += visit.score
inc count
let scoreSum = if count == 0: "" else: &"{sum:>4.1f}"
let scoreAvg = if count == 0: "" else: &"{sum / count.toFloat: >4.2f}"
echo &"| {id:^10} | {name:^10} | {lastVisit:^10} | {scoreSum:>7} | {scoreAvg:>6} |"</syntaxhighlight>
 
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |</pre>
 
===CSV files and SQLite3===
{{trans|Python}}
We use the high level standard library "db_sqlite" rather than the low level one "sqlite3".
<syntaxhighlight lang="nim">import parseCsv, db_sqlite, sequtils, strutils
 
const FNames = ["patients1.csv", "patients2.csv"]
 
proc createTableHeaders(conn: DbConn) =
conn.exec(sql"CREATE TABLE names(PATIENT_ID INT, LASTNAME TEXT);")
conn.exec(sql"CREATE TABLE visits(PATIENT_ID INT, VISIT_DATE DATE, SCORE NUMERIC(4,1));")
 
 
proc fillTables(dbc: DbConn) =
for idx, fname in FNames:
dbc.exec(sql"BEGIN")
var parser: CsvParser
parser.open(fname)
parser.readHeaderRow()
while parser.readRow():
if idx == 0: # "names" table.
dbc.exec(sql"INSERT INTO names VALUES (?, ?);", parser.row)
else: # "visits" table
dbc.exec(sql"INSERT INTO visits VALUES (?, ?, ?);", parser.row)
dbc.exec(sql"COMMIT")
 
 
proc joinTablesAndGroup(dbc: DbConn): seq[Row] =
dbc.exec(sql"""CREATE TABLE answer AS
SELECT
names.PATIENT_ID,
names.LASTNAME,
MAX(VISIT_DATE) AS LAST_VISIT,
SUM(SCORE) AS SCORE_SUM,
CAST(AVG(SCORE) AS DECIMAL(10,2)) AS SCORE_AVG
FROM
names
LEFT JOIN visits
ON visits.PATIENT_ID = names.PATIENT_ID
GROUP BY
names.PATIENT_ID,
names.LASTNAME
ORDER BY
names.PATIENT_ID;""")
result = dbc.getAllRows(sql"SELECT * FROM ANSWER")
 
# Build the database and execute the request to get the result.
let dbc = open(":memory:", "", "", "")
dbc.createTableHeaders()
dbc.fillTables()
let result = dbc.joinTablesAndGroup()
dbc.close()
 
# Print the result.
echo "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |"
for row in result:
echo "| " & row.mapit(it.center(10)).join(" | ") & '|'</syntaxhighlight>
 
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 3.4 |
| 3003 | Kemeny | 2020-11-12 | 0.0 | 0 |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | | | |</pre>
 
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">constant</span> <span style="color: #000000;">patients_txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"""
PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz"""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">visits_txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"""
PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3"""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'\n'</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">--or, assuming these files contain exactly the same actual raw text,
-- and the use of GT_LF_STRIPPED is instead of above split('\n'):
--constant patients_txt = get_text("patients.csv",GT_LF_STRIPPED),
-- visits_txt = get_text("visits.csv",GT_LF_STRIPPED)</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">sap</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">','</span><span style="color: #0000FF;">,</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)&{</span><span style="color: #008000;">""</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">svp</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">','</span><span style="color: #0000FF;">,</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">patient_data</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #000000;">patients_txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..$],</span><span style="color: #000000;">sap</span><span style="color: #0000FF;">)),</span>
<span style="color: #000000;">visit_data</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort_columns</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #000000;">visits_txt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..$],</span><span style="color: #000000;">svp</span><span style="color: #0000FF;">),{</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">})</span>
<span style="color: #000000;">visit_data</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">visit_data</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">""</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"0"</span><span style="color: #0000FF;">})</span> <span style="color: #000080;font-style:italic;">-- (add a sentinel)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">last_id</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">avstr</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">score</span><span style="color: #0000FF;">,</span><span style="color: #000000;">score_total</span><span style="color: #0000FF;">,</span><span style="color: #000000;">average</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">visit_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">pdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">visit_data</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">visit_data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">score</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">=</span><span style="color: #008000;">""</span><span style="color: #0000FF;">?</span><span style="color: #000000;">0</span><span style="color: #0000FF;">:</span><span style="color: #7060A8;">scanf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%f"</span><span style="color: #0000FF;">)[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">id</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">last_id</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">visit_count</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">average</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">score_total</span><span style="color: #0000FF;">/</span><span style="color: #000000;">visit_count</span>
<span style="color: #000000;">patient_data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pdx</span><span style="color: #0000FF;">][</span><span style="color: #000000;">4</span><span style="color: #0000FF;">..</span><span style="color: #000000;">5</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">score_total</span><span style="color: #0000FF;">,</span><span style="color: #000000;">average</span><span style="color: #0000FF;">}</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">visit_data</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">-- (sentinel)</span>
<span style="color: #000000;">score_total</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">score</span>
<span style="color: #000000;">visit_count</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">score</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">while</span> <span style="color: #7060A8;">id</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">patient_data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pdx</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">pdx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">patient_data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pdx</span><span style="color: #0000FF;">][</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dt</span>
<span style="color: #000000;">last_id</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">id</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">score</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">score_total</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">score</span>
<span style="color: #000000;">visit_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">patient_data</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">score</span><span style="color: #0000FF;">,</span><span style="color: #000000;">average</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">patient_data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">scstr</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">score</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #008000;">""</span><span style="color: #0000FF;">:</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%4.1f"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">score</span><span style="color: #0000FF;">))</span>
<span style="color: #000000;">avstr</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">average</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #008000;">""</span><span style="color: #0000FF;">:</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%4.2f"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">average</span><span style="color: #0000FF;">))</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"| %-10s | %-7s | %10s | %-9s | %-9s |\n"</span><span style="color: #0000FF;">,</span>
<span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">avstr</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |
</pre>
 
=={{header|Perl}}==
Weird requirement: "Use the most common libraries only when built-in functionality is not sufficient."
Not even a "use strict;" :)
<syntaxhighlight lang="perl">#!/usr/bin/perl
my $fmt = '| %-11s' x 5 . "|\n";
printf $fmt, qw( PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG);
my ($names, $visits) = do { local $/; split /^\n/m, <DATA> };
my %score;
for ( $visits =~ /^\d.*/gm )
{
my ($id, undef, $score) = split /,/;
$score{$id} //= ['', ''];
$score and $score{$id}[0]++, $score{$id}[1] += $score;
}
for ( sort $names =~ /^\d.*/gm )
{
my ($id, $name) = split /,/;
printf $fmt, $id, $name, ( sort $visits =~ /^$id,(.*?),/gm, '' )[-1],
$score{$id}[0]
? ( $score{$id}[1], $score{$id}[1] / $score{$id}[0])
: ('', '');
}
 
__DATA__
PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
 
PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3</syntaxhighlight>
{{out}}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | | | |
</pre>
=={{header|Prolog}}==
Implemented using SWI Prolog:
<syntaxhighlight lang="prolog">patient(1001,'Hopper').
patient(4004,'Wirth').
patient(3003,'Kemeny').
patient(2002,'Gosling').
patient(5005,'Kurtz').
visit(2002,'2020-09-10',6.8).
visit(1001,'2020-09-17',5.5).
visit(4004,'2020-09-24',8.4).
visit(2002,'2020-10-08',nan).
visit(1001,'',6.6).
visit(3003,'2020-11-12',nan).
visit(4004,'2020-11-05',7.0).
visit(1001,'2020-11-19',5.3).
 
summaryDates(Id, Lastname, LastDate) :-
aggregate(max(Ts),
Score^Date^(visit(Id, Date, Score), Date \= '', parse_time(Date, iso_8601, Ts)),
MaxTs),
format_time(atom(LastDate), '%Y-%m-%d', MaxTs),
patient(Id,Lastname).
 
summaryScores(Id, Lastname, Sum, Mean) :-
aggregate(r(sum(Score),count), Date^(visit(Id, Date, Score), Score \= nan), r(Sum,Count)),
patient(Id,Lastname),
Mean is Sum/Count.
 
test :-
summaryDates(Id, Lastname, LastDate),
writeln(summaryDates(Id, Lastname, LastDate)),
fail.
 
test :-
summaryScores(Id, Lastname, ScoreSum, ScoreMean),
writeln(summaryScores(Id, Lastname, ScoreSum, ScoreMean)),
fail.</syntaxhighlight>
{{output}}
<pre>summaryDates(1001,Hopper,2020-11-19)
summaryDates(2002,Gosling,2020-10-08)
summaryDates(3003,Kemeny,2020-11-12)
summaryDates(4004,Wirth,2020-11-05)
summaryScores(1001,Hopper,17.4,5.8)
summaryScores(2002,Gosling,6.8,6.8)
summaryScores(4004,Wirth,15.4,7.7)
false.</pre>
Implemented using XSB Prolog (which allows for user-defined aggregates):
<syntaxhighlight lang="prolog">:- import bagMax/2, bagCount/2, bagSum/2, bagReduce/4 from aggregs.
:- import julian_date/7, date_string/3 from iso8601.
:- import load_csv/2, add_cvt_type_hook/2 from proc_files.
 
?- add_cvt_type_hook(date,date_converter(_,_)).
 
date_converter(Atom,Date) :- date_string('YYYY-MM-DD',Date,Atom).
 
:- load_csv('visit.csv',visit(integer,date,float)).
:- load_csv('patient.csv',patient(integer,atom)).
 
is_nan(Number) :- X is Number, X =\= Number.
 
summaryDates(Id, Lastname, LastDate) :-
bagMax(date_number(Id), LastDateNumber),
patient(Id,Lastname),
julian_date(LastDateNumber, Y, M, D, _, _, _),
date_converter(LastDate, date(Y,M,D)).
 
summaryScores(Id, Lastname, Sum, Mean) :-
bagSum(scores(Id), Sum),
bagCount(scores(Id), Count),
Mean is Sum/Count,
patient(Id,Lastname).
 
test :-
summaryDates(Id,Lastname,LastDate),
writeln(summaryDates(Id,Lastname,LastDate)), fail.
 
test :-
summaryScores(Id, Lastname, ScoreSum, ScoreMean),
writeln(summaryScores(Id, Lastname, ScoreSum, ScoreMean)), fail.
 
/* Put hilog declarations together */
 
date_number(Id)(Number) :-
visit(Id, date(Y,M,D), _),
julian_date(Number, Y, M, D, _, _, _).
scores(Id)(Score) :-
visit(Id, _, Score),
\+is_nan(Score).
 
:- hilog maximum.
maximum(X,Y,Z) :- X @> Y -> Z=X ; Z=Y.
:- hilog sum.
sum(X,Y,Z) :- Z is X+Y.
:- hilog successor.
successor(X,_Y,Z) :- Z is X+1.</syntaxhighlight>
 
=={{header|PureBasic}}==
<syntaxhighlight lang="purebasic">Structure Person
Name$
EndStructure
 
Structure Visits
Datum$
Score$
EndStructure
 
Structure Merge
Patient.Person
List PVisit.Visits()
EndStructure
 
NewMap P.Merge()
NewList ID$()
 
If ReadFile(1,"./Data/patients.csv")=0 : End 1 : EndIf
header=1
While Not Eof(1)
buf1$=ReadString(1)
If header=1 : header=0 : Continue : EndIf
bufId$=StringField(buf1$,1,",")
P(bufId$)\Patient\Name$=StringField(buf1$,2,",")
AddElement(ID$()) : ID$()=bufId$
Wend
CloseFile(1)
 
If ReadFile(2,"./Data/visits.csv")=0 : End 2 : EndIf
header=1
While Not Eof(2)
buf1$=ReadString(2)
If header=1 : header=0 : Continue : EndIf
bufId$=StringField(buf1$,1,",")
AddElement(P(bufId$)\PVisit())
P(bufId$)\PVisit()\Datum$=StringField(buf1$,2,",")
P(bufId$)\PVisit()\Score$=StringField(buf1$,3,",")
Wend
CloseFile(2)
 
If OpenConsole()=0 : End 3 : EndIf
SortList(ID$(),#PB_Sort_Ascending)
PrintN("| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |")
ForEach ID$()
Print("| "+LSet(ID$(),11))
Print("| "+LSet(P(ID$())\Patient\Name$,9)+"|")
SortStructuredList(P(ID$())\PVisit(),#PB_Sort_Ascending,OffsetOf(Visits\Datum$),TypeOf(Visits\Datum$))
ForEach P(ID$())\PVisit()
scs.f+ValF(p(ID$())\PVisit()\Score$) : c+Bool(ValF(p(ID$())\PVisit()\Score$))
Next
If LastElement(P(ID$())\PVisit())
sca.f=scs/c
Print(" "+LSet(P(ID$())\PVisit()\Datum$,10)+" |")
Print(RSet(StrF(scs,1),10)+" |")
If Not IsNAN(sca) : Print(RSet(StrF(sca,2),10)+" |") : Else : Print(Space(11)+"|") : EndIf
Else
Print(Space(12)+"|"+Space(11)+"|"+Space(11)+"|")
EndIf
PrintN("") : scs=0 : c=0
Next
Input()</syntaxhighlight>
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | 0.0 | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |</pre>
 
=={{header|Python}}==
 
===Python: Using pandas library===
<syntaxhighlight lang="python"># to install pandas library go to cmd prompt and type:
# cd %USERPROFILE%\AppData\Local\Programs\Python\Python38-32\Scripts\
# pip install pandas
import pandas as pd
 
# load data from csv files
df_patients = pd.read_csv (r'patients.csv', sep = ",", decimal=".")
df_visits = pd.read_csv (r'visits.csv', sep = ",", decimal=".")
 
''' # load data hard coded, create data frames
import io
str_patients = """PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
"""
df_patients = pd.read_csv(io.StringIO(str_patients), sep = ",", decimal=".")
str_visits = """PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3
"""
df_visits = pd.read_csv(io.StringIO(str_visits), sep = ",", decimal=".")
'''
 
# typecast from string to datetime so .agg can 'max' it
df_visits['VISIT_DATE'] = pd.to_datetime(df_visits['VISIT_DATE'])
 
# merge on PATIENT_ID
df_merge = df_patients.merge(df_visits, on='PATIENT_ID', how='left')
 
# groupby is an intermediate object
df_group = df_merge.groupby(['PATIENT_ID','LASTNAME'], as_index=False)
 
# note: you can use 'sum' instead of the lambda function but that returns NaN as 0 (zero)
df_result = df_group.agg({'VISIT_DATE': 'max', 'SCORE': [lambda x: x.sum(min_count=1),'mean']})
 
print(df_result)</syntaxhighlight>
<pre>
PATIENT_ID LASTNAME LAST_VISIT SCORE
max <lambda_0> mean
0 1001 Hopper 2020-11-19 17.4 5.8
1 2002 Gosling 2020-10-08 6.8 6.8
2 3003 Kemeny 2020-11-12 NaN NaN
3 4004 Wirth 2020-11-05 15.4 7.7
4 5005 Kurtz NaT NaN NaN
</pre>
 
===Python: Stdlib csv only===
Using only standard libraries and input from csv files.
 
<syntaxhighlight lang="python">import csv
 
fnames = 'patients.csv patients_visits.csv'.split()
 
def csv2list(fname):
with open(fname) as f:
rows = list(csv.reader(f))
return rows
 
patients, visits = data = [csv2list(fname) for fname in fnames]
result = [record.copy() for record in patients]
result[1:] = sorted(result[1:])
#%%
result[0].append('LAST_VISIT')
last = {p: vis for p, vis, *score in visits[1:]}
for record in result[1:]:
p = record[0]
record.append(last.get(p, ''))
#%%
result[0] += ['SCORE_SUM', 'SCORE_AVG']
n = {p: 0 for p, *_ in patients[1:]}
tot = n.copy()
for record in visits[1:]:
p, _, score = record
if score:
n[p] += 1
tot[p] += float(score)
for record in result[1:]:
p = record[0]
if n[p]:
record += [f"{tot[p]:5.1f}", f"{tot[p] / n[p]:5.2f}"]
else:
record += ['', '']
#%%
for record in result:
print(f"| {' | '.join(f'{r:^10}' for r in record)} |")</syntaxhighlight>
 
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |</pre>
 
 
===Python: Stdlib sqlite3 and csv only===
Using the csv module just to parse inputs; and the sqlite3 module, (which which is also a standard library that comes with the base Python install), to calculate the output.<br>
(The SQL SELECT statement is modelled on that of the SQL entry elsewhere on this page).
 
<syntaxhighlight lang="python">import sqlite3
import csv
 
 
fnames = 'patients.csv patients_visits.csv'.split()
conn = sqlite3.connect(":memory:")
#%%
def create_table_headers(conn):
curs = conn.cursor()
curs.execute('''
CREATE TABLE patients(PATIENT_ID INT, LASTNAME TEXT);
''')
curs.execute('''
CREATE TABLE patients_visits(PATIENT_ID INT, VISIT_DATE DATE, SCORE NUMERIC(4,1));
''')
conn.commit()
 
def fill_tables(conn, fnames):
curs = conn.cursor()
for fname in fnames:
with open(fname) as f:
tablename = fname.replace('.csv', '')
#
csvdata = csv.reader(f)
header = next(csvdata)
fields = ','.join('?' for _ in header)
for row in csvdata:
row = [(None if r == '' else r) for r in row]
curs.execute(f"INSERT INTO {tablename} VALUES ({fields});", row)
conn.commit()
 
def join_tables_and_group(conn):
curs = conn.cursor()
curs.execute('''
CREATE TABLE answer AS
SELECT
patients.PATIENT_ID,
patients.LASTNAME,
MAX(VISIT_DATE) AS LAST_VISIT,
SUM(SCORE) AS SCORE_SUM,
CAST(AVG(SCORE) AS DECIMAL(10,2)) AS SCORE_AVG
FROM
patients
LEFT JOIN patients_visits
ON patients_visits.PATIENT_ID = patients.PATIENT_ID
GROUP BY
patients.PATIENT_ID,
patients.LASTNAME
ORDER BY
patients.PATIENT_ID;
''')
curs.execute('''
SELECT * FROM answer;
''')
conn.commit()
rows = list(curs.fetchall())
headers = tuple(d[0] for d in curs.description)
return [headers] + rows
create_table_headers(conn)
fill_tables(conn, fnames)
result = join_tables_and_group(conn)
for record in result:
print(f"| {' | '.join(f'{str(r):^10}' for r in record)} |")</syntaxhighlight>
 
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 3003 | Kemeny | 2020-11-12 | None | None |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | None | None | None |</pre>
 
=={{header|R}}==
<syntaxhighlight lang="r"># load data from csv files
# setwd("C:\Temp\csv\")
# df_patient <- read.csv(file="patients.csv", header = TRUE, sep = ",")
# df_visitdf_visits <- read.csv(file="visitvisits.csv", header = TRUE, sep = ",", dec = ".", colClasses=c("character","character","numeric"))
 
# load datasetsdata hard coded, create data frames
df_patient <- read.table(text = "
PATIENT_ID,LASTNAME
Line 56 ⟶ 2,002:
2002,Gosling
5005,Kurtz
", header = TRUE, sep = ",") # character fields so no need for extra parameters colClasses etc.
 
df_visitdf_visits <- read.table(text = "
PATIENT_ID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,NULL
1001,NULL,6.6
3003,2020-11-12,NULL
4004,2020-11-05,7.0
1001,2020-11-19,5.3
", header = TRUE, dec = ".", sep = ",", colClasses=c("character","character","numeric"))
 
# aggregate visit date and scores
# result
df_result df_agg <- data.frame(
cbind(
PATIENT_ID = names(tapply(df_visitdf_visits$VISIT_DATE, list(df_visitdf_visits$PATIENT_ID), max, na.rm=TRUE)),
last_visit = tapply(df_visitdf_visits$VISIT_DATE, list(df_visitdf_visits$PATIENT_ID), max, na.rm=TRUE),
score_avgscore_sum = tapply(df_visitdf_visits$SCORE, list(df_visitdf_visits$PATIENT_ID), meansum, na.rm=TRUE),
score_sumscore_avg = tapply(df_visitdf_visits$SCORE, list(df_visitdf_visits$PATIENT_ID), summean, na.rm=TRUE)
)
)
 
# merge patients and aggregate dataset
# combine two datasets in r
# all.x = all the non matching cases of df_patient are appended to the result as well (i.e. 'left join')
df_result2df_result <- merge(df_patient, df_resultdf_agg, by = 'PATIENT_ID', all.x = TRUE)</lang>
 
print(df_result)</syntaxhighlight>
{{out}}
<pre>
PATIENT_ID LASTNAME last_visit score_sum score_avg
1 1001 Hopper 2020-11-19 17.4 5.8
2 2002 Gosling 2020-10-08 6.8 6.8
3 3003 Kemeny 2020-11-12 0 NaN
4 4004 Wirth 2020-11-05 15.4 7.7
5 5005 Kurtz <NA> <NA> <NA>
</pre>
 
=={{header|Raku}}==
<syntaxhighlight lang="raku" line>my @names = map { ( <PATIENT_ID LASTNAME> Z=> .list ).hash },
( 1001, 'Hopper' ),
( 4004, 'Wirth' ),
( 3003, 'Kemeny' ),
( 2002, 'Gosling' ),
( 5005, 'Kurtz' ),
;
my @visits = map { ( <PATIENT_ID VISIT_DATE SCORE> Z=> .list ).hash },
( 2002, '2020-09-10', 6.8 ),
( 1001, '2020-09-17', 5.5 ),
( 4004, '2020-09-24', 8.4 ),
( 2002, '2020-10-08', Nil ),
( 1001, Nil , 6.6 ),
( 3003, '2020-11-12', Nil ),
( 4004, '2020-11-05', 7.0 ),
( 1001, '2020-11-19', 5.3 ),
;
 
my %v = @visits.classify: *.<PATIENT_ID>;
 
my @result = gather for @names -> %n {
my @p = %v{ %n.<PATIENT_ID> }<>;
 
my @dates = @p».<VISIT_DATE>.grep: *.defined;
my @scores = @p».< SCORE>.grep: *.defined;
 
take {
%n,
LAST_VISIT => ( @dates.max if @dates ),
SCORE_AVG => ( @scores.sum/@scores if @scores ),
SCORE_SUM => ( @scores.sum if @scores ),
};
}
 
my @out_field_names = <PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG>;
my @rows = @result.sort(*.<PATIENT_ID>).map(*.{@out_field_names});
say .map({$_ // ''}).fmt('%-10s', ' | ') for @out_field_names, |@rows;</syntaxhighlight>
{{out}}
<pre>
PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG
1001 | Hopper | 2020-11-19 | 17.4 | 5.8
2002 | Gosling | 2020-10-08 | 6.8 | 6.8
3003 | Kemeny | 2020-11-12 | |
4004 | Wirth | 2020-11-05 | 15.4 | 7.7
5005 | Kurtz | | |
</pre>
 
=={{header|REXX}}==
<syntaxhighlight lang="rexx">/* REXX */
patients='patients.csv'
l=linein(patients)
Parse Var l h1 ',' h2
n=0
idl=''
Do n=1 By 1 While lines(patients)>0
l=linein(patients)
Parse Var l id ',' lastname.id
idl=idl id
End
n=n-1 /* number of patients */
visits='visits.csv'
l=linein(visits) /* skip the header line of this file */
h3='LAST_VISIT'
h4='SCORE_SUM'
h5='SCORE_AVG'
date.=''
score.=0
Say '|' h1 '|' h2 '|' h3 '|' h4 '|' h5 '|'
Do While lines(visits)>0
l=linein(visits)
Parse Var l id ',' date ',' score
if date>date.id Then date.id=date
If score>'' Then Do
z=score.id.0+1
score.id.z=score
score.id.0=z
End
end
idl=wordsort(idl)
Do While idl<>''
Parse Var idl id idl
If date.id='' Then date.id=copies(' ',10)
ol='|' left(id,length(h1)) '|' left(lastname.id,length(h2)),
'|' left(date.id,length(h3))
If score.id.0=0 Then Do
ol=ol '|' left(' ',length(h4)) '|',
left(' ',length(h5)) '|'
score_sum=copies(' ',length(h4))
score_avg=copies(' ',length(h4))
End
Else Do
score_sum=0
Do j=1 To score.id.0
score_sum=score_sum+score.id.j
End
score_avg=score_sum/score.id.0
ol=ol '|' left(format(score_sum,2,1),length(h4)) '|',
left(format(score_avg,2,1),length(h5)) '|'
End
Say ol
End
Exit
 
wordsort: Procedure
/**********************************************************************
* Sort the list of words supplied as argument. Return the sorted list
**********************************************************************/
Parse Arg wl
wa.=''
wa.0=0
Do While wl<>''
Parse Var wl w wl
Do i=1 To wa.0
If wa.i>w Then Leave
End
If i<=wa.0 Then Do
Do j=wa.0 To i By -1
ii=j+1
wa.ii=wa.j
End
End
wa.i=w
wa.0=wa.0+1
End
swl=''
Do i=1 To wa.0
swl=swl wa.i
End
Return strip(swl)</syntaxhighlight>
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 5005 | Kurtz | | | |</pre>
=={{header|SAS}}==
<syntaxhighlight lang="sas"> %let datefmt=E8601DA10.;
data patient;
infile "patient.csv" dsd dlm=',';
attrib
id length=4
lastname length=$10;
input id lastname;
data visit;
infile "visit.csv" dsd dlm=',';
attrib
id length=4
date informat=&datefmt format=&datefmt
score length=8;
input id date score;
proc sql;
select * from
(select id, max(date) format=&datefmt as max_date, sum(score) as sum_score,
avg(score) as avg_score from visit group by id)
natural right join patient
order by id;</syntaxhighlight>
{{out}}
<pre>: id lastname max_date sum_score avg_score
: ------------------------------------------------------
: 1001 Hopper 2020-11-19 17.4 5.8
: 2002 Gosling 2020-10-08 6.8 6.8
: 3003 Kemeny 2020-11-12 . .
: 4004 Wirth 2020-11-05 15.4 7.7
: 5005 Kurtz . . .</pre>
 
=={{header|SPSS}}==
 
Note: the PATIENT_ID 5005 is missing from the result, because MATCH FILES cannot add rows for missing keys, i.e. AFAIK a "left join" is not possible is SPSS.
 
<syntaxhighlight lang="spss">* set working directory to location of .csv files
 
CD 'C:\Temp\csv\'.
 
* load patients csv data
 
GET DATA /TYPE=TXT
/FILE="patients.csv"
/ENCODING='UTF8'
/DELCASE=LINE
/DELIMITERS=","
/QUALIFIER='"'
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/IMPORTCASE=ALL
/VARIABLES=
PATIENT_ID F5.0
LASTNAME A20
.
CACHE.
EXECUTE.
 
* sort cases is needed to match files
 
SORT CASES BY PATIENT_ID (A).
DATASET NAME Patients WINDOW=FRONT.
 
* load visits csv data
 
GET DATA /TYPE=TXT
/FILE="visit.csv"
/ENCODING='UTF8'
/DELCASE=LINE
/DELIMITERS=","
/QUALIFIER='"'
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/IMPORTCASE=ALL
/VARIABLES=
PATIENT_ID F5.0
VISIT_DATE SDATE10
SCORE F4.1
.
CACHE.
EXECUTE.
 
* sort cases is needed, else match files will raise error "Files out of order"
 
SORT CASES BY PATIENT_ID (A) VISIT_DATE (A).
DATASET NAME Visits WINDOW=FRONT.
 
* load visits csv data
 
* merge datasets, one to many, FILE is the 'one', TABLE is 'many'
 
MATCH FILES TABLE = Patients / FILE = Visits
/BY PATIENT_ID.
EXECUTE.
 
* aggregate visit date and scores, group by and order (A)=ascending or (D)=descending
 
AGGREGATE OUTFILE *
/BREAK=PATIENT_ID(A)
/last_visit = MAX(VISIT_DATE)
/score_avg = MEAN(SCORE)
/score_sum = SUM(SCORE).</syntaxhighlight>
 
{{out}}
<pre>
| PATIENT_ID | LASTNAME | last_visit | score_sum | score_avg |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 3003 | Kemeny | 2020-11-12 | . | . |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
</pre>
 
=={{header|SQL}}==
 
<syntaxhighlight lang="sql">-- drop tables
<lang SQL>
-- drop tables
DROP TABLE IF EXISTS tmp_patients;
DROP TABLE IF EXISTS tmp_visittmp_visits;
 
-- create tables
Line 98 ⟶ 2,300:
);
 
CREATE TABLE tmp_visittmp_visits(
PATIENT_ID INT,
VISIT_DATE DATE,
Line 104 ⟶ 2,306:
);
 
-- load data from csv files into tables
/*
-- Note: for LOAD DATA LOCAL to work you have to enablerequires `local-infile` enabled on both the client and server else you get error "#1148 command is not allowed.."
LOAD DATA LOCAL INFILE '/home/csv/patients.csv' INTO TABLE `tmp_patients` FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES;
-- else you'll get the error "#1148 command is not allowed.."
LOAD DATA LOCAL INFILE '/home/csvimportcsv/patientsvisits.csv' INTO TABLE `tmp_visits` FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES;
INTO TABLE `tmp_patients`
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
 
LOAD DATA LOCAL INFILE '/home/csvimport/visits.csv'
INTO TABLE `tmp_visit`
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
*/
 
-- insertload recordsdata intohard tablescoded
INSERT INTO tmp_patients(PATIENT_ID, LASTNAME)
VALUES
Line 130 ⟶ 2,322:
(5005, 'Kurtz');
 
INSERT INTO tmp_visittmp_visits(PATIENT_ID, VISIT_DATE, SCORE)
VALUES
(2002, '2020-09-10', 6.8),
Line 150 ⟶ 2,342:
FROM
tmp_patients p
LEFT JOIN tmp_visittmp_visits v
ON v.PATIENT_ID = p.PATIENT_ID
GROUP BY
Line 156 ⟶ 2,348:
p.LASTNAME
ORDER BY
p.PATIENT_ID;</syntaxhighlight>
{{out}}
</lang>
<pre>
PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG
1001 Hopper 2020-11-19 17.4 5.80
2002 Gosling 2020-10-08 6.8 6.80
3003 Kemeny 2020-11-12 NULL NULL
4004 Wirth 2020-11-05 15.4 7.70
5005 Kurtz NULL NULL NULL
</pre>
 
=={{header|Transd}}==
<syntaxhighlight lang="scheme">#lang transd
 
MainModule: {
 
tbl: `1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz`,
 
tbl1: `
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3`,
 
cols: `@key_PATIENT_ID:Int,
LASTNAME:String,
VISIT_DATE:DateTime,
SCORE:Double,
SCORE_AVG:Double,
NUM_VISITS:Int`,
 
Record : typealias(Tuple<Int DateTime Double>()),
 
_start: (λ (with base TSDBase()
(load-table base tbl colNames: cols)
(build-index base "PATIENT_ID")
 
(with vizs Vector<Record>()
(load-table vizs tbl1 :mixedTypes fieldSep: "," rowSep: "\n" )
(for viz in vizs do
(tsd-query base
:update set:
(lambda PATIENT_ID Int() VISIT_DATE DateTime()
SCORE Double() SCORE_AVG Double() NUM_VISITS Int()
(+= NUM_VISITS 1)
(set VISIT_DATE (get viz 1))
(set SCORE (+ SCORE (get viz 2)))
(set SCORE_AVG (/ SCORE NUM_VISITS)))
where: (lambda PATIENT_ID Int() (eq PATIENT_ID (get viz 0))))
))
(with cols ["PATIENT_ID","LASTNAME","VISIT_DATE","SCORE","SCORE_AVG"]
(with recs (tsd-query base select: cols
as: [[Int(), String(), DateTime(), Double(), Double()]]
where: (lambda PATIENT_ID Int() true)
)
(for i in cols do (textout width: 10 i "|")) (lout "")
(for rec in recs do
(for-each rec (λ i :Data() (textout width: 10 i "|" )))
(lout ""))
))
))
}</syntaxhighlight>{{out}}
<pre>
PATIENT_ID| LASTNAME|VISIT_DATE| SCORE| SCORE_AVG|
1001| Hopper|2020-11-19| 17.4| 5.8|
4004| Wirth| 2020-11-5| 15.4| 7.7|
3003| Kemeny|2020-11-12| 0| 0|
2002| Gosling| 2020-10-8| 6.8| 3.4|
5005| Kurtz| | 0| 0|
</pre>
 
=={{header|TutorialD}}==
<syntaxhighlight lang="text">BEGIN;
TYPE Date UNION;
TYPE DateValid IS {Date POSSREP {year INTEGER, month INTEGER, day INTEGER}};
TYPE DateNone IS {Date POSSREP {}};
TYPE DateUnknown IS {Date POSSREP {}};
END;
 
VAR patient REAL RELATION {id INT, lastname CHAR} KEY {id};
 
INSERT patient RELATION
{TUPLE {id 1001, lastname 'Hopper'},
TUPLE {id 4004, lastname 'Wirth'},
TUPLE {id 3003, lastname 'Kemeny'},
TUPLE {id 2002, lastname 'Gosling'},
TUPLE {id 5005, lastname 'Kurtz'}
};
 
VAR visit REAL RELATION {id INT, date Date, score RATIONAL} KEY {id, date};
 
INSERT visit RELATION
{
TUPLE {id 2002, date DateValid(2020,09,10), score 6.8},
TUPLE {id 1001, date DateValid(2020,09,17), score 5.5},
TUPLE {id 4004, date DateValid(2020,09,24), score 8.4},
TUPLE {id 2002, date DateValid(2020,10,08), score NAN},
TUPLE {id 1001, date DateNone(), score 6.6},
TUPLE {id 3003, date DateValid(2020,11,12), score NAN},
TUPLE {id 4004, date DateValid(2020,11,05), score 7.0},
TUPLE {id 1001, date DateValid(2020,11,19), score 5.3}
};
 
((SUMMARIZE (visit WHERE score>0.0) BY {id}: {sumscore := SUM(score), avgscore := AVG(score)}) UNION
(EXTEND (patient {id} MINUS ((visit WHERE score>0.0) {id})): {sumscore := NaN, avgscore := NaN})) JOIN
(SUMMARIZE visit BY {id}: {maxdate := MAX(date)} UNION
(EXTEND (patient {id} MINUS (visit {id})): {maxdate := DateUnknown()})) JOIN
patient</syntaxhighlight>
{{out}}
<pre>
id sumscore avgscore maxdate lastname
1001 17.4 5.8 DateValid(2020, 11, 19) Hopper
2002 6.8 6.8 DateValid(2020, 10, 8) Gosling
4004 15.4 7.7 DateValid(2020, 11, 5) Wirth
3003 NaN NaN DateValid(2020, 11, 12) Kemeny
5005 NaN NaN DateUnknown() Kurtz</pre>
 
=={{header|Wren}}==
{{libheader|Wren-sort}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="wren">import "./fmt" for Fmt
import "./sort" for Sort
 
class Patient {
construct new(id, lastName) {
_id = id
_lastName = lastName
if (!__dir) __dir = {}
__dir[id] = lastName
if (!__ids) {
__ids = [id]
} else {
__ids.add(id)
Sort.insertion(__ids)
}
}
 
id { _id }
lastName { _lastName }
 
// maps an id to a lastname
static dir { __dir }
 
// maintains a sorted list of ids
static ids { __ids }
}
 
class Visit {
construct new(id, date, score) {
_id = id
_date = date || "0000-00-00"
_score = score
if (!__dir) __dir = {}
if (!__dir[id]) {
__dir[id] = [ [_date], [score] ]
} else {
__dir[id][0].add(_date)
__dir[id][1].add(score)
}
}
 
id { _id }
date { _date }
score { _score }
 
// maps an id to lists of dates and scores
static dir { __dir }
}
 
class Merge {
construct new(id) {
_id = id
}
 
id { _id }
lastName { Patient.dir[_id] }
dates { Visit.dir[_id][0] }
scores { Visit.dir[_id][1] }
lastVisit { Sort.merge(dates)[-1] }
 
scoreSum { scores.reduce(0) { |acc, s| s ? acc + s : acc } }
scoreAvg { scoreSum / scores.count { |s| s } }
 
static print(merges) {
System.print("| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |")
var fmt = "| $d | $-7s | $s | $4s | $4s |"
for (m in merges) {
if (Visit.dir[m.id]) {
var lv = (m.lastVisit != "0000-00-00") ? m.lastVisit : " "
var ss = (m.scoreSum > 0) ? Fmt.f(4, m.scoreSum, 1) : " "
var sa = (!m.scoreAvg.isNan) ? Fmt.f(4, m.scoreAvg, 2) : " "
Fmt.print(fmt, m.id, m.lastName, lv, ss, sa)
} else {
Fmt.print(fmt, m.id, m.lastName, " ", " ", " ")
}
}
}
}
 
Patient.new(1001, "Hopper")
Patient.new(4004, "Wirth")
Patient.new(3003, "Kemeny")
Patient.new(2002, "Gosling")
Patient.new(5005, "Kurtz")
 
Visit.new(2002, "2020-09-10", 6.8)
Visit.new(1001, "2020-09-17", 5.5)
Visit.new(4004, "2020-09-24", 8.4)
Visit.new(2002, "2020-10-08", null)
Visit.new(1001, null , 6.6)
Visit.new(3003, "2020-11-12", null)
Visit.new(4004, "2020-11-05", 7.0)
Visit.new(1001, "2020-11-19", 5.3)
 
var merges = Patient.ids.map { |id| Merge.new(id) }.toList
Merge.print(merges)</syntaxhighlight>
 
{{out}}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.80 |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.80 |
| 3003 | Kemeny | 2020-11-12 | | |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.70 |
| 5005 | Kurtz | | | |
</pre>
2,136

edits