Merge and aggregate datasets: Difference between revisions

Added FreeBASIC
(J)
(Added FreeBASIC)
 
(27 intermediate revisions by 5 users not shown)
Line 71:
{{trans|Python: Stdlib csv only}}
 
<langsyntaxhighlight lang="11l">V patients_csv =
‘PATIENT_ID,LASTNAME
1001,Hopper
Line 125:
 
L(record) result
print(‘| ’record.map(r -> r.center(10)).join(‘ | ’)‘ |’)</langsyntaxhighlight>
 
{{out}}
Line 138:
 
=={{header|AutoHotkey}}==
<langsyntaxhighlight AutoHotkeylang="autohotkey">Merge_and_aggregate(patients, visits){
ID := [], LAST_VISIT := [], SCORE_SUM := [], VISIT := []
for i, line in StrSplit(patients, "`n", "`r"){
Line 161:
output .= ID "`t" name "`t" LAST_VISIT[id] "`t" SCORE_SUM[id] "`t" SCORE_SUM[id]/VISIT[id] "`n"
return output
}</langsyntaxhighlight>
Examples:<langsyntaxhighlight AutoHotkeylang="autohotkey">patients =
(
PATIENT_ID,LASTNAME
Line 186:
 
MsgBox % Merge_and_aggregate(patients, visits)
return</langsyntaxhighlight>
{{out}}
<pre>PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG
Line 196:
 
=={{header|AWK}}==
<syntaxhighlight lang="awk">
<lang AWK>
# syntax: GAWK -f MERGE_AND_AGGREGATE_DATASETS.AWK RC-PATIENTS.CSV RC-VISITS.CSV
# files may appear in any order
Line 235:
exit(0)
}
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 247:
=={{header|C++}}==
Uses C++20
<langsyntaxhighlight lang="cpp">#include <iostream>
#include <optional>
#include <ranges>
Line 323:
cout << " |\n";
}
}</langsyntaxhighlight>
 
{{out}}
Line 336:
</pre>
=={{header|C sharp}}==
<langsyntaxhighlight lang="csharp">using System;
using System.Collections.Generic;
using System.Globalization;
Line 408:
}
 
}</langsyntaxhighlight>
{{out}}
<pre>
Line 420:
=={{header|Datalog}}==
The following implementation is for Souffle. Souffle does not currently include dates and times in the base distribution, but it is straightforward to use C timestamps:
<langsyntaxhighlight Clang="c">// datetime.cpp
#include <ctime>
#include <cstdint>
Line 429:
return mktime(&tmInfo); // localtime
}
}</langsyntaxhighlight>
 
Rather than combine the summaries, the date and score summaries have been presented separately.
<langsyntaxhighlight lang="prolog">/* rosetta.dl */
#define NaN to_float("NaN")
.functor from_date(date:symbol) : number
Line 467:
score_mean = mean score: {Visit(id, _, score), score != NaN}.
.output SummaryDates
.output SummaryScores</langsyntaxhighlight>
 
Then this is called using:
<langsyntaxhighlight lang="bash">g++ -shared -fPIC datetime.cpp -o libfunctors.so
souffle -D- rosetta.dl</langsyntaxhighlight>
{{output}}
<pre>---------------
Line 490:
4004 Wirth 2020-11-05
===============</pre>
 
=={{header|FreeBASIC}}==
{{trans|C++}}
<syntaxhighlight lang="vbnet">Type Patient
ID As String
LastName As String
End Type
 
Type Visit
PatientID As String
Fecha As String
Score As Single
HasScore As Integer
End Type
 
Dim As Patient patients(5)
Dim As Visit visits(8)
 
patients(1).ID = "1001": patients(1).LastName = "Hopper"
patients(2).ID = "4004": patients(2).LastName = "Wirth"
patients(3).ID = "3003": patients(3).LastName = "Kemeny"
patients(4).ID = "2002": patients(4).LastName = "Gosling"
patients(5).ID = "5005": patients(5).LastName = "Kurtz"
 
visits(1).PatientID = "2002": visits(1).Fecha = "2020-09-10": visits(1).Score = 6.8: visits(1).HasScore = -1
visits(2).PatientID = "1001": visits(2).Fecha = "2020-09-17": visits(2).Score = 5.5: visits(2).HasScore = -1
visits(3).PatientID = "4004": visits(3).Fecha = "2020-09-24": visits(3).Score = 8.4: visits(3).HasScore = -1
visits(4).PatientID = "2002": visits(4).Fecha = "2020-10-08": visits(4).HasScore = 0
visits(5).PatientID = "1001": visits(5).Fecha = "" : visits(5).Score = 6.6: visits(5).HasScore = -1
visits(6).PatientID = "3003": visits(6).Fecha = "2020-11-12": visits(6).HasScore = 0
visits(7).PatientID = "4004": visits(7).Fecha = "2020-11-05": visits(7).Score = 7.0: visits(7).HasScore = -1
visits(8).PatientID = "1001": visits(8).Fecha = "2020-11-19": visits(8).Score = 5.3: visits(8).HasScore = -1
 
Print "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |"
For i As Integer = 1 To 5
Dim As String lastVisit = ""
Dim As Single sum = 0
Dim As Integer numScores = 0
For j As Integer = 1 To 8
If patients(i).ID = visits(j).PatientID Then
If visits(j).HasScore Then
sum += visits(j).Score
numScores += 1
End If
If visits(j).Fecha > lastVisit Then
lastVisit = visits(j).Fecha
End If
End If
Next j
Print "| "; patients(i).ID; " | ";
Print Using "\ \ | \ \ | "; patients(i).LastName; lastVisit;
If numScores > 0 Then
Print Using "#######.# | #######.#"; sum; (sum / Csng(numScores));
Else
Print " | ";
End If
Print " |"
Next i
 
Sleep</syntaxhighlight>
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
| 1001 | Hopper | 2020-11-19 | 17.4 | 5.8 |
| 4004 | Wirth | 2020-11-05 | 15.4 | 7.7 |
| 3003 | Kemeny | 2020-11-12 | | |
| 2002 | Gosling | 2020-10-08 | 6.8 | 6.8 |
| 5005 | Kurtz | | | |</pre>
 
=={{header|F_Sharp|F#}}==
Note that the scores are right justified to copy the task description. It would be more natural to leave them right justified.
<langsyntaxhighlight lang="fsharp">
// Merge and aggregate datasets. Nigel Galloway: January 6th., 2021
let rFile(fName)=seq{use n=System.IO.File.OpenText(fName)
Line 503 ⟶ 570:
let fG n g=let z=G.[n]|>Seq.sumBy(fun n->try float n.[2] with :? System.FormatException->0.0)
fN n g (G.[n]|>Seq.sort|>Seq.last).[1] (if z=0.0 then "" else string z) (if z=0.0 then "" else string(z/(float(Seq.length G.[n]))))
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 518 ⟶ 585:
=={{header|Go}}==
{{trans|Wren}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 655 ⟶ 722:
}
mergePrint(merges)
}</langsyntaxhighlight>
 
{{out}}
Line 673 ⟶ 740:
Merging of fields and databases is defined as a monoid operation for corresponding types.
 
<langsyntaxhighlight lang="haskell">import Data.List
import Data.Maybe
import System.IO (readFile)
Line 738 ⟶ 805:
where
go [] = Nothing
go s = Just $ drop 1 <$> span (/= ch) s</langsyntaxhighlight>
 
<pre>let patients = readDB <$> readFile "patients.csv"
Line 765 ⟶ 832:
===Pretty tabulation===
 
<langsyntaxhighlight lang="haskell">
tabulateDB (DB ps) header cols = intercalate "|" <$> body
where
Line 787 ⟶ 854:
, \p -> case scores p of {[] -> []; s -> show (mean s)} ]
 
mean lst = sum lst / genericLength lst</langsyntaxhighlight>
 
<pre>*Main> main
Line 796 ⟶ 863:
4004 | Wirth | 2020-11-05 | 15.4 | 7.7
5005 | Kurtz | | | </pre>
 
=={{header|Harbour}}==
 
Harbour does not have special values for NA or NaN, and missing numerical values are represented as zeros. In the following, we have used -999 for missing scores.
 
<syntaxhighlight lang="xbase">
#xcommand INSERT INTO <table> ( <uField1>[, <uFieldN> ] ) VALUE ( <uVal1>[, <uValN> ] ) => ;
<table>->(dbAppend()); <table>-><uField1> := <uVal1> [; <table>-><uFieldN> := <uValN>]
#xcommand INSERT INTO <table> ( <uFieldList,...> ) VALUES ( <uValList1,...> ) [, ( <uValListN,...> )] => ;
INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValList1> ) ;
[; INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValListN> )]
&& Singular cases (so we can use VALUES for all instances)
#xcommand INSERT INTO <table> (<uField>) VALUE (<uVal>) => ;
<table>->(dbAppend()); <table>-><uField> := <uVal>
#xcommand INSERT INTO <table> ( <uFieldList,...> ) VALUES ( <uValList,...> ) => ;
INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValList> )
 
PROCEDURE Main()
LOCAL pStruct, vStruct, rStruct, xCurId, aAgg
 
SET DATE FORMAT "yyyy-mm-dd"
 
&& create and populate the patient table
pStruct := {{"patient_id", "n", 8, 0}, {"lastname", "c", 10, 0 }}
dbCreate( "patient", pStruct,, .T., "patient" )
INSERT INTO patient (patient_id, lastname) VALUES (1001, "Hopper"), (4004, "Wirth"), ;
(3003, "Kemeny"), (2002, "Gosling"), (5005, "Kurtz")
INDEX ON patient_id TO pat_id
&& create and populate the visit table
vStruct := {{"patient_id", "n", 8, 0}, {"visit_date", "d", 10, 0}, {"score", "n", 8, 1}}
dbCreate( "visit", vStruct,, .T., "visit" )
INSERT INTO visit (patient_id, visit_date, score) VALUES (2002, ctod("2020-09-10"), 6.8), ;
(1001, ctod("2020-09-17"), 5.5), (4004, ctod("2020-09-24"), 8.4), ;
(2002, ctod("2020-10-08"), -999), (1001, ctod("1900-01-01"), 6.6), ;
(3003, ctod("2020-11-12"), -999), (4004, ctod("2020-11-05"), 7.0), ;
(1001, ctod("2020-11-19"), 5.3)
INDEX ON patient_id TO visit_id
 
&& create the result table
rStruct := { {"patient_id", "n", 8, 0}, ;
{"n", "i", 8, 0}, {"sum_score", "n", 8, 1}, ;
{"avg_score", "n", 8, 1}, {"max_date", "d", 10, 0}}
dbCreate("report", rStruct,, .T., "report")
 
SELECT visit
DO WHILE ! Eof()
xCurId := patient_id && grouping variable
aAgg := {0, 0, 0.0, ctod("1900-01-01")} && initial values
DO WHILE ! Eof() .AND. xCurId == patient_id
aAgg := {1+aAgg[1], iif(score==-999,aAgg[2],1+aAgg[2]), ;
iif(score==-999, aAgg[3], score+aAgg[3]), max(visit_date, aAgg[4])} && update
SKIP
ENDDO
INSERT INTO report (patient_id, n, sum_score, avg_score, max_date) ;
VALUES (xCurId, aAgg[1], aAgg[3], aAgg[3]/aAgg[2], aAgg[4])
ENDDO
 
SELECT report
INDEX ON patient_id TO report_id
 
SELECT patient
SET RELATION TO patient_id INTO report
? "NUM", "PATIENT_ID", "LASTNAME", "N", "SUM_SCORE", "AVG_SCORE", "MAX_DATE"
LIST patient_id, lastname, report->n, report->sum_score, report->avg_score, report->max_date
 
RETURN</syntaxhighlight>
 
With output:
 
<pre>NUM PATIENT_ID LASTNAME N SUM_SCORE AVG_SCORE MAX_DATE
1 1001 Hopper 3 17.4 5.8 2020-11-19
4 2002 Gosling 2 6.8 6.8 2020-10-08
3 3003 Kemeny 1 0.0 0.0 2020-11-12
2 4004 Wirth 2 15.4 7.7 2020-11-05
5 5005 Kurtz 0 0.0 0.0 - - </pre>
 
=={{header|J}}==
Line 802 ⟶ 945:
 
In other words, we can set things up like this:
<langsyntaxhighlight Jlang="j">NB. setup:
require'jd pacman'
load JDP,'tools/csv_load.ijs'
Line 835 ⟶ 978:
csvload 'visits';1
 
jd'ref patients PATIENTID visits PATIENTID'</langsyntaxhighlight>
 
And, then we can run our query:
 
<langsyntaxhighlight Jlang="j">require'jd'
 
echo jd {{)n
Line 853 ⟶ 996:
p:patients,
v:p.visits
}} -.LF</langsyntaxhighlight>
 
Which displays this result:
Line 865 ⟶ 1,008:
│5005 │5005 │Kurtz │? │ 0 │ 0 │
└───────────┴──────────┴────────┴──────────┴─────────┴─────────┘</pre>
 
Another approach would be to use J's csv library:
 
<syntaxhighlight lang="j">require'csv'
patients=: fixcsv {{)n
PATIENTID,LASTNAME
1001,Hopper
4004,Wirth
3003,Kemeny
2002,Gosling
5005,Kurtz
}}
 
visits=: fixcsv {{)n
PATIENTID,VISIT_DATE,SCORE
2002,2020-09-10,6.8
1001,2020-09-17,5.5
4004,2020-09-24,8.4
2002,2020-10-08,
1001,,6.6
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3
}}
 
task=: {{
P=. <@:>"1|:/:~}.patients
V=. <@:>"1|:/:~}.visits
id=. 0 {:: P
nm=. 1 {:: P
sel1=. (0 {:: P) e. 0 {:: V
sel2=. (~.0 {:: V) e. 0 {:: P NB. unnecessary for this example
exp=. sel1 #inv sel2 # ]
agg=. /.(&.:".)
vdt=. exp (0 {:: V) {:/. 1 {:: V
sum=. exp ":,.(0 {:: V) +//. 0". 2 {:: V
avg=. exp ":,.(0 {:: V) (+/%#)/. 0". 2 {:: V
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG'
labels,:id;nm;vdt;sum;avg
}}</syntaxhighlight>
 
Here:
<syntaxhighlight lang="j"> task''
┌──────────┬────────┬──────────┬─────────┬─────────┐
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│
├──────────┼────────┼──────────┼─────────┼─────────┤
│1001 │Hopper │2020-11-19│17.4 │5.8 │
│2002 │Gosling │2020-10-08│ 6.8 │3.4 │
│3003 │Kemeny │2020-11-12│ 0 │ 0 │
│4004 │Wirth │2020-11-05│15.4 │7.7 │
│5005 │Kurtz │ │ │ │
└──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight>
 
If the empty score in visits was a display concern, we might instead do it this way:
 
<syntaxhighlight lang="j">task=: {{
P=. <@:>"1|:/:~}.patients
V=. <@:>"1|:/:~}.visits
id=. 0 {:: P
nm=. 1 {:: P
sel1=. (0 {:: P) e. 0 {:: V
sel2=. (~.0 {:: V) e. 0 {:: P NB. unnecessary for this example
exp=. sel1 #inv sel2 # ]
agg=. /.(&.:".)
vdt=. exp (0 {:: V) {:/. 1 {:: V
sel3=. (0 {:: V) +.//. 2 *@#@{::"1 }.visits
exp2=: [:exp sel3 #inv sel3 #]
sum=. exp2 ":,.(0 {:: V) +//. 0". 2 {:: V
avg=. exp2 ":,.(0 {:: V) (+/%#)/. 0". 2 {:: V
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG'
labels,:id;nm;vdt;sum;avg
}}</syntaxhighlight>
 
Which gives us:
 
<syntaxhighlight lang="j"> task''
┌──────────┬────────┬──────────┬─────────┬─────────┐
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│
├──────────┼────────┼──────────┼─────────┼─────────┤
│1001 │Hopper │2020-11-19│17.4 │5.8 │
│2002 │Gosling │2020-10-08│ 6.8 │3.4 │
│3003 │Kemeny │2020-11-12│ │ │
│4004 │Wirth │2020-11-05│15.4 │7.7 │
│5005 │Kurtz │ │ │ │
└──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.DoubleSummaryStatistics;
import java.util.List;
 
public final class MergeAndAggregateDatasets {
 
public static void main(String[] args) {
List<Patient> patients = Arrays.asList(
new Patient("1001", "Hopper"),
new Patient("4004", "Wirth"),
new Patient("3003", "Kemeny"),
new Patient("2002", "Gosling"),
new Patient("5005", "Kurtz") );
 
List<Visit> visits = Arrays.asList(
new Visit("2002", "2020-09-10", 6.8),
new Visit("1001", "2020-09-17", 5.5),
new Visit("4004", "2020-09-24", 8.4),
new Visit("2002", "2020-10-08", null),
new Visit("1001", "" , 6.6),
new Visit("3003", "2020-11-12", null),
new Visit("4004", "2020-11-05", 7.0),
new Visit("1001", "2020-11-19", 5.3) );
Collections.sort(patients, Comparator.comparing(Patient::patientID));
System.out.println("| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |");
for ( Patient patient : patients ) {
List<Visit> patientVisits = visits.stream().filter( v -> v.visitID == patient.patientID() ).toList();
String lastVisit = patientVisits.stream()
.map( v -> v.visitDate ).max(Comparator.naturalOrder()).orElseGet( () -> " None " );
DoubleSummaryStatistics statistics = patientVisits.stream()
.filter( v -> v.score != null ).mapToDouble(Visit::score).summaryStatistics();
double scoreSum = statistics.getSum();
double scoreAverage = statistics.getAverage();
String patientDetails = String.format("%12s%11s%13s%12.2f%12.2f",
patient.patientID, patient.lastName, lastVisit, scoreSum, scoreAverage);
System.out.println(patientDetails);
}
 
private static record Patient(String patientID, String lastName) {};
private static record Visit(String visitID, String visitDate, Double score) {};
 
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
1001 Hopper 2020-11-19 17.40 5.80
2002 Gosling 2020-10-08 6.80 6.80
3003 Kemeny 2020-11-12 0.00 0.00
4004 Wirth 2020-11-05 15.40 7.70
5005 Kurtz None 0.00 0.00
</pre>
 
=={{header|jq}}==
Line 875 ⟶ 1,163:
 
'''Ingesting CSV data'''
<syntaxhighlight lang="jq">
<lang jq>
# objectify/1 takes an array of atomic values as inputs, and packages
# these into an object with keys specified by the "headers" array and
Line 893 ⟶ 1,181:
([]; . + [ $row|objectify($headers) ]);
 
</syntaxhighlight>
</lang>
'''Aggregation functions'''
<syntaxhighlight lang="jq">
<lang jq>
# output {LAST_VISIT}
def LAST_VISIT($patient_id):
Line 905 ⟶ 1,193:
| {SCORE_SUM: add, count: length}
| {SCORE_SUM, SCORE_AVG: (if .SCORE_SUM and .count > 0 then .SCORE_SUM/.count else null end)};
</langsyntaxhighlight>'''The task'''<syntaxhighlight lang ="jq">
# Read the two tables:
INDEX($patients | [splits("\n")] | map(split(",")) | csv2jsonHelper[]; .PATIENT_ID) as $patients
Line 916 ⟶ 1,204:
# ... but display it as a sequence of JSON objects
| .[]
</syntaxhighlight>
</lang>
'''Invocation'''
 
Line 934 ⟶ 1,222:
=={{header|Julia}}==
{{trans|Python}}
<langsyntaxhighlight lang="julia">using CSV, DataFrames, Statistics
 
# load data from csv files
Line 973 ⟶ 1,261:
end
println(df_result)
</langsyntaxhighlight>{{out}}
<pre>
5×5 DataFrame
Line 987 ⟶ 1,275:
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
<langsyntaxhighlight Mathematicalang="mathematica">a = ImportString["PATIENT_ID,LASTNAME
1001,Hopper
4004,Wirth
Line 1,011 ⟶ 1,299:
"SCORE_SUM" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Total@DeleteMissing@#[[All, "SCORE"]], Missing[]],
"SCORE_AVG" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Mean@DeleteMissing@#[[All, "SCORE"]], Missing[]]|> & /@
gr // Dataset</langsyntaxhighlight>
{{out}}
<pre> PATIENT_ID LASTNAME VISIT_DATE SCORE_SUM SCORE_AVG
Line 1,020 ⟶ 1,308:
5005 5005 Kurtz \[LongDash] \[LongDash] \[LongDash]</pre>
=={{header|Mercury}}==
<langsyntaxhighlight Prologlang="prolog">:- module rosetta.
 
:- interface.
Line 1,052 ⟶ 1,340:
 
%% Utilities
:- pred bag_aggr(pred(T)::(pred(out) is nondet), pred(T,U,U)::pred(in,in,out) is det,
U::in, U::out) is det.
:- pred bag_count(pred(T)::(pred(out) is nondet), int::out) is det.
:- pred bag_sum(pred(float)::(pred(out) is nondet), float::out) is det.
:- pred bag_avg(pred(float)::(pred(out) is nondet), float::out) is det.
:- pred bag_max(pred(T)::(pred(out) is nondet), T::in, T::out) is det.
:- pred bag_max_date(pred(maybe_date)::(pred(out) is nondet), maybe_date::out) is det.
bag_aggr(Predicate, Aggregator, Initial, Result) :-
promise_equivalent_solutions[Result] (
unsorted_aggregate(Predicate, Aggregator, Initial, Result)).
bag_count(Predicate, Count) :-
bag_aggr(Predicate, (pred(_X::in,Y::in,Z::out) is det :- Z = Y+1), 0, Count).
promise_equivalent_solutions[Count] (
unsorted_aggregate(Predicate,
(pred(_X::in,Y::in,Z::out) is det :- Z is Y+1),
0, Count)).
bag_sum(Predicate, Sum) :-
bag_aggr(Predicate, (pred(X::in,Y::in,Z::out) is det :- Z = X+Y), 0.0, Sum).
promise_equivalent_solutions[Sum] (
unsorted_aggregate(Predicate,
(pred(X::in,Y::in,Z::out) is det :- Z is X+Y),
0.0, Sum)).
bag_avg(Predicate, Avg) :-
bag_count(Predicate, N),
bag_sum(Predicate, Sum),
(if N = 0 then Avg = nan else Avg is= Sum/float(N)).
bag_max(Predicate, Initial, Max) :-
bag_aggr(Predicate,
promise_equivalent_solutions [Max] (
(pred(X::in,Y::in,Z::out) is det :-
unsorted_aggregate(Predicate,
(predcompare(R,X::in,Y::in),Z::out) is det :-
(if R = (>) then Z = X else Z = Y)),
compare(R,X,Y),
Initial, Max).
(if R = (>) then Z = X else Z = Y)),
bag_max_date(Predicate, MaxDate) :-
Initial, Max)).
bag_max(Predicate, date(0,0,0), MaxDate1),
(if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1).
 
main(!IO) :-
io.write_stringprint_line("{Id, Lastname, SumScores, AvgScores, MaxDate}:\n", !IO),
solutionsaggregate((pred({Id,Lastname,Sum,Avg,MaxDate}::out) is nondet :-
patient(Id,Lastname),
Scores = (pred(Score::out) is nondet :- visit(Id,_,Score), \+is_nan(Score)),
bag_avg(Scores, Avg),
bag_sum(Scores, Sum),
Dates = (pred(Date::out) is nondet :- visit(Id,Date,_), Date\=date(_,_,_)no),
bag_maxbag_max_date(Dates, date(0,0,0MaxDate), MaxDate1),
print_line,
(if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1)),
Solutions!IO),.</syntaxhighlight>
foldl(io.write_line, Solutions, !IO).</lang>
{{out}}
<pre>{Id, Lastname, SumScores, AvgScores, MaxDate}:
Line 1,100 ⟶ 1,389:
=={{header|Nim}}==
===CSV files and tables===
<langsyntaxhighlight Nimlang="nim">import algorithm, parsecsv, strformat, strutils, tables
 
const NoValue = -1.0
Line 1,152 ⟶ 1,441:
let scoreSum = if count == 0: "" else: &"{sum:>4.1f}"
let scoreAvg = if count == 0: "" else: &"{sum / count.toFloat: >4.2f}"
echo &"| {id:^10} | {name:^10} | {lastVisit:^10} | {scoreSum:>7} | {scoreAvg:>6} |"</langsyntaxhighlight>
 
{{out}}
Line 1,165 ⟶ 1,454:
{{trans|Python}}
We use the high level standard library "db_sqlite" rather than the low level one "sqlite3".
<langsyntaxhighlight Nimlang="nim">import parseCsv, db_sqlite, sequtils, strutils
 
const FNames = ["patients1.csv", "patients2.csv"]
Line 1,217 ⟶ 1,506:
echo "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |"
for row in result:
echo "| " & row.mapit(it.center(10)).join(" | ") & '|'</langsyntaxhighlight>
 
{{out}}
Line 1,228 ⟶ 1,517:
 
=={{header|Phix}}==
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">constant</span> <span style="color: #000000;">patients_txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"""
PATIENT_ID,LASTNAME
Line 1,286 ⟶ 1,575:
<span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">avstr</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 1,300 ⟶ 1,589:
Weird requirement: "Use the most common libraries only when built-in functionality is not sufficient."
Not even a "use strict;" :)
<langsyntaxhighlight lang="perl">#!/usr/bin/perl
my $fmt = '| %-11s' x 5 . "|\n";
printf $fmt, qw( PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG);
Line 1,336 ⟶ 1,625:
3003,2020-11-12,
4004,2020-11-05,7.0
1001,2020-11-19,5.3</langsyntaxhighlight>
{{out}}
<pre>
Line 1,348 ⟶ 1,637:
=={{header|Prolog}}==
Implemented using SWI Prolog:
<langsyntaxhighlight lang="prolog">patient(1001,'Hopper').
patient(4004,'Wirth').
patient(3003,'Kemeny').
Line 1,383 ⟶ 1,672:
summaryScores(Id, Lastname, ScoreSum, ScoreMean),
writeln(summaryScores(Id, Lastname, ScoreSum, ScoreMean)),
fail.</langsyntaxhighlight>
{{output}}
<pre>summaryDates(1001,Hopper,2020-11-19)
Line 1,394 ⟶ 1,683:
false.</pre>
Implemented using XSB Prolog (which allows for user-defined aggregates):
<langsyntaxhighlight lang="prolog">:- import bagMax/2, bagCount/2, bagSum/2, bagReduce/4 from aggregs.
:- import julian_date/7, date_string/3 from iso8601.
:- import load_csv/2, add_cvt_type_hook/2 from proc_files.
Line 1,442 ⟶ 1,731:
sum(X,Y,Z) :- Z is X+Y.
:- hilog successor.
successor(X,_Y,Z) :- Z is X+1.</langsyntaxhighlight>
 
=={{header|PureBasic}}==
<langsyntaxhighlight PureBasiclang="purebasic">Structure Person
Name$
EndStructure
Line 1,505 ⟶ 1,794:
PrintN("") : scs=0 : c=0
Next
Input()</langsyntaxhighlight>
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
Line 1,517 ⟶ 1,806:
 
===Python: Using pandas library===
<langsyntaxhighlight Pythonlang="python"># to install pandas library go to cmd prompt and type:
# cd %USERPROFILE%\AppData\Local\Programs\Python\Python38-32\Scripts\
# pip install pandas
Line 1,561 ⟶ 1,850:
df_result = df_group.agg({'VISIT_DATE': 'max', 'SCORE': [lambda x: x.sum(min_count=1),'mean']})
 
print(df_result)</langsyntaxhighlight>
<pre>
PATIENT_ID LASTNAME LAST_VISIT SCORE
Line 1,575 ⟶ 1,864:
Using only standard libraries and input from csv files.
 
<langsyntaxhighlight lang="python">import csv
 
fnames = 'patients.csv patients_visits.csv'.split()
Line 1,610 ⟶ 1,899:
#%%
for record in result:
print(f"| {' | '.join(f'{r:^10}' for r in record)} |")</langsyntaxhighlight>
 
{{out}}
Line 1,625 ⟶ 1,914:
(The SQL SELECT statement is modelled on that of the SQL entry elsewhere on this page).
 
<langsyntaxhighlight lang="python">import sqlite3
import csv
 
Line 1,689 ⟶ 1,978:
result = join_tables_and_group(conn)
for record in result:
print(f"| {' | '.join(f'{str(r):^10}' for r in record)} |")</langsyntaxhighlight>
 
{{out}}
Line 1,700 ⟶ 1,989:
 
=={{header|R}}==
<langsyntaxhighlight Rlang="r"># load data from csv files
# setwd("C:\Temp\csv\")
# df_patient <- read.csv(file="patients.csv", header = TRUE, sep = ",")
Line 1,741 ⟶ 2,030:
df_result <- merge(df_patient, df_agg, by = 'PATIENT_ID', all.x = TRUE)
 
print(df_result)</langsyntaxhighlight>
{{out}}
<pre>
Line 1,753 ⟶ 2,042:
 
=={{header|Raku}}==
<syntaxhighlight lang="raku" perl6line>my @names = map { ( <PATIENT_ID LASTNAME> Z=> .list ).hash },
( 1001, 'Hopper' ),
( 4004, 'Wirth' ),
Line 1,789 ⟶ 2,078:
my @out_field_names = <PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG>;
my @rows = @result.sort(*.<PATIENT_ID>).map(*.{@out_field_names});
say .map({$_ // ''}).fmt('%-10s', ' | ') for @out_field_names, |@rows;</langsyntaxhighlight>
{{out}}
<pre>
Line 1,801 ⟶ 2,090:
 
=={{header|REXX}}==
<langsyntaxhighlight lang="rexx">/* REXX */
patients='patients.csv'
l=linein(patients)
Line 1,881 ⟶ 2,170:
swl=swl wa.i
End
Return strip(swl)</langsyntaxhighlight>
{{out}}
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |
Line 1,890 ⟶ 2,179:
| 5005 | Kurtz | | | |</pre>
=={{header|SAS}}==
<langsyntaxhighlight lang="sas"> %let datefmt=E8601DA10.;
data patient;
infile "patient.csv" dsd dlm=',';
Line 1,909 ⟶ 2,198:
avg(score) as avg_score from visit group by id)
natural right join patient
order by id;</langsyntaxhighlight>
{{out}}
<pre>: id lastname max_date sum_score avg_score
Line 1,923 ⟶ 2,212:
Note: the PATIENT_ID 5005 is missing from the result, because MATCH FILES cannot add rows for missing keys, i.e. AFAIK a "left join" is not possible is SPSS.
 
<langsyntaxhighlight SPSSlang="spss">* set working directory to location of .csv files
 
CD 'C:\Temp\csv\'.
Line 1,988 ⟶ 2,277:
/last_visit = MAX(VISIT_DATE)
/score_avg = MEAN(SCORE)
/score_sum = SUM(SCORE).</langsyntaxhighlight>
 
{{out}}
Line 2,001 ⟶ 2,290:
=={{header|SQL}}==
 
<langsyntaxhighlight SQLlang="sql">-- drop tables
DROP TABLE IF EXISTS tmp_patients;
DROP TABLE IF EXISTS tmp_visits;
Line 2,059 ⟶ 2,348:
p.LASTNAME
ORDER BY
p.PATIENT_ID;</langsyntaxhighlight>
{{out}}
<pre>
Line 2,071 ⟶ 2,360:
 
=={{header|Transd}}==
<langsyntaxhighlight lang="scheme">#lang transd
 
MainModule: {
Line 2,129 ⟶ 2,418:
))
))
}</langsyntaxhighlight>{{out}}
<pre>
PATIENT_ID| LASTNAME|VISIT_DATE| SCORE| SCORE_AVG|
Line 2,140 ⟶ 2,429:
 
=={{header|TutorialD}}==
<syntaxhighlight lang="text">BEGIN;
TYPE Date UNION;
TYPE DateValid IS {Date POSSREP {year INTEGER, month INTEGER, day INTEGER}};
Line 2,175 ⟶ 2,464:
(SUMMARIZE visit BY {id}: {maxdate := MAX(date)} UNION
(EXTEND (patient {id} MINUS (visit {id})): {maxdate := DateUnknown()})) JOIN
patient</langsyntaxhighlight>
{{out}}
<pre>
Line 2,188 ⟶ 2,477:
{{libheader|Wren-sort}}
{{libheader|Wren-fmt}}
<langsyntaxhighlight ecmascriptlang="wren">import "./fmt" for Fmt
import "./sort" for Sort
 
class Patient {
Line 2,283 ⟶ 2,572:
 
var merges = Patient.ids.map { |id| Merge.new(id) }.toList
Merge.print(merges)</langsyntaxhighlight>
 
{{out}}
2,122

edits