Merge and aggregate datasets: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
|||
Line 71: | Line 71: | ||
{{trans|Python: Stdlib csv only}} |
{{trans|Python: Stdlib csv only}} |
||
< |
<syntaxhighlight lang="11l">V patients_csv = |
||
‘PATIENT_ID,LASTNAME |
‘PATIENT_ID,LASTNAME |
||
1001,Hopper |
1001,Hopper |
||
Line 125: | Line 125: | ||
L(record) result |
L(record) result |
||
print(‘| ’record.map(r -> r.center(10)).join(‘ | ’)‘ |’)</ |
print(‘| ’record.map(r -> r.center(10)).join(‘ | ’)‘ |’)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 138: | Line 138: | ||
=={{header|AutoHotkey}}== |
=={{header|AutoHotkey}}== |
||
< |
<syntaxhighlight lang="autohotkey">Merge_and_aggregate(patients, visits){ |
||
ID := [], LAST_VISIT := [], SCORE_SUM := [], VISIT := [] |
ID := [], LAST_VISIT := [], SCORE_SUM := [], VISIT := [] |
||
for i, line in StrSplit(patients, "`n", "`r"){ |
for i, line in StrSplit(patients, "`n", "`r"){ |
||
Line 161: | Line 161: | ||
output .= ID "`t" name "`t" LAST_VISIT[id] "`t" SCORE_SUM[id] "`t" SCORE_SUM[id]/VISIT[id] "`n" |
output .= ID "`t" name "`t" LAST_VISIT[id] "`t" SCORE_SUM[id] "`t" SCORE_SUM[id]/VISIT[id] "`n" |
||
return output |
return output |
||
}</ |
}</syntaxhighlight> |
||
Examples:< |
Examples:<syntaxhighlight lang="autohotkey">patients = |
||
( |
( |
||
PATIENT_ID,LASTNAME |
PATIENT_ID,LASTNAME |
||
Line 186: | Line 186: | ||
MsgBox % Merge_and_aggregate(patients, visits) |
MsgBox % Merge_and_aggregate(patients, visits) |
||
return</ |
return</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG |
<pre>PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG |
||
Line 196: | Line 196: | ||
=={{header|AWK}}== |
=={{header|AWK}}== |
||
<syntaxhighlight lang="awk"> |
|||
<lang AWK> |
|||
# syntax: GAWK -f MERGE_AND_AGGREGATE_DATASETS.AWK RC-PATIENTS.CSV RC-VISITS.CSV |
# syntax: GAWK -f MERGE_AND_AGGREGATE_DATASETS.AWK RC-PATIENTS.CSV RC-VISITS.CSV |
||
# files may appear in any order |
# files may appear in any order |
||
Line 235: | Line 235: | ||
exit(0) |
exit(0) |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 247: | Line 247: | ||
=={{header|C++}}== |
=={{header|C++}}== |
||
Uses C++20 |
Uses C++20 |
||
< |
<syntaxhighlight lang="cpp">#include <iostream> |
||
#include <optional> |
#include <optional> |
||
#include <ranges> |
#include <ranges> |
||
Line 323: | Line 323: | ||
cout << " |\n"; |
cout << " |\n"; |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 336: | Line 336: | ||
</pre> |
</pre> |
||
=={{header|C sharp}}== |
=={{header|C sharp}}== |
||
< |
<syntaxhighlight lang="csharp">using System; |
||
using System.Collections.Generic; |
using System.Collections.Generic; |
||
using System.Globalization; |
using System.Globalization; |
||
Line 408: | Line 408: | ||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 420: | Line 420: | ||
=={{header|Datalog}}== |
=={{header|Datalog}}== |
||
The following implementation is for Souffle. Souffle does not currently include dates and times in the base distribution, but it is straightforward to use C timestamps: |
The following implementation is for Souffle. Souffle does not currently include dates and times in the base distribution, but it is straightforward to use C timestamps: |
||
< |
<syntaxhighlight lang="c">// datetime.cpp |
||
#include <ctime> |
#include <ctime> |
||
#include <cstdint> |
#include <cstdint> |
||
Line 429: | Line 429: | ||
return mktime(&tmInfo); // localtime |
return mktime(&tmInfo); // localtime |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
Rather than combine the summaries, the date and score summaries have been presented separately. |
Rather than combine the summaries, the date and score summaries have been presented separately. |
||
< |
<syntaxhighlight lang="prolog">/* rosetta.dl */ |
||
#define NaN to_float("NaN") |
#define NaN to_float("NaN") |
||
.functor from_date(date:symbol) : number |
.functor from_date(date:symbol) : number |
||
Line 467: | Line 467: | ||
score_mean = mean score: {Visit(id, _, score), score != NaN}. |
score_mean = mean score: {Visit(id, _, score), score != NaN}. |
||
.output SummaryDates |
.output SummaryDates |
||
.output SummaryScores</ |
.output SummaryScores</syntaxhighlight> |
||
Then this is called using: |
Then this is called using: |
||
< |
<syntaxhighlight lang="bash">g++ -shared -fPIC datetime.cpp -o libfunctors.so |
||
souffle -D- rosetta.dl</ |
souffle -D- rosetta.dl</syntaxhighlight> |
||
{{output}} |
{{output}} |
||
<pre>--------------- |
<pre>--------------- |
||
Line 493: | Line 493: | ||
=={{header|F_Sharp|F#}}== |
=={{header|F_Sharp|F#}}== |
||
Note that the scores are right justified to copy the task description. It would be more natural to leave them right justified. |
Note that the scores are right justified to copy the task description. It would be more natural to leave them right justified. |
||
< |
<syntaxhighlight lang="fsharp"> |
||
// Merge and aggregate datasets. Nigel Galloway: January 6th., 2021 |
// Merge and aggregate datasets. Nigel Galloway: January 6th., 2021 |
||
let rFile(fName)=seq{use n=System.IO.File.OpenText(fName) |
let rFile(fName)=seq{use n=System.IO.File.OpenText(fName) |
||
Line 503: | Line 503: | ||
let fG n g=let z=G.[n]|>Seq.sumBy(fun n->try float n.[2] with :? System.FormatException->0.0) |
let fG n g=let z=G.[n]|>Seq.sumBy(fun n->try float n.[2] with :? System.FormatException->0.0) |
||
fN n g (G.[n]|>Seq.sort|>Seq.last).[1] (if z=0.0 then "" else string z) (if z=0.0 then "" else string(z/(float(Seq.length G.[n])))) |
fN n g (G.[n]|>Seq.sort|>Seq.last).[1] (if z=0.0 then "" else string z) (if z=0.0 then "" else string(z/(float(Seq.length G.[n])))) |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 518: | Line 518: | ||
=={{header|Go}}== |
=={{header|Go}}== |
||
{{trans|Wren}} |
{{trans|Wren}} |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 655: | Line 655: | ||
} |
} |
||
mergePrint(merges) |
mergePrint(merges) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 673: | Line 673: | ||
Merging of fields and databases is defined as a monoid operation for corresponding types. |
Merging of fields and databases is defined as a monoid operation for corresponding types. |
||
< |
<syntaxhighlight lang="haskell">import Data.List |
||
import Data.Maybe |
import Data.Maybe |
||
import System.IO (readFile) |
import System.IO (readFile) |
||
Line 738: | Line 738: | ||
where |
where |
||
go [] = Nothing |
go [] = Nothing |
||
go s = Just $ drop 1 <$> span (/= ch) s</ |
go s = Just $ drop 1 <$> span (/= ch) s</syntaxhighlight> |
||
<pre>let patients = readDB <$> readFile "patients.csv" |
<pre>let patients = readDB <$> readFile "patients.csv" |
||
Line 765: | Line 765: | ||
===Pretty tabulation=== |
===Pretty tabulation=== |
||
< |
<syntaxhighlight lang="haskell"> |
||
tabulateDB (DB ps) header cols = intercalate "|" <$> body |
tabulateDB (DB ps) header cols = intercalate "|" <$> body |
||
where |
where |
||
Line 787: | Line 787: | ||
, \p -> case scores p of {[] -> []; s -> show (mean s)} ] |
, \p -> case scores p of {[] -> []; s -> show (mean s)} ] |
||
mean lst = sum lst / genericLength lst</ |
mean lst = sum lst / genericLength lst</syntaxhighlight> |
||
<pre>*Main> main |
<pre>*Main> main |
||
Line 802: | Line 802: | ||
In other words, we can set things up like this: |
In other words, we can set things up like this: |
||
< |
<syntaxhighlight lang="j">NB. setup: |
||
require'jd pacman' |
require'jd pacman' |
||
load JDP,'tools/csv_load.ijs' |
load JDP,'tools/csv_load.ijs' |
||
Line 835: | Line 835: | ||
csvload 'visits';1 |
csvload 'visits';1 |
||
jd'ref patients PATIENTID visits PATIENTID'</ |
jd'ref patients PATIENTID visits PATIENTID'</syntaxhighlight> |
||
And, then we can run our query: |
And, then we can run our query: |
||
< |
<syntaxhighlight lang="j">require'jd' |
||
echo jd {{)n |
echo jd {{)n |
||
Line 853: | Line 853: | ||
p:patients, |
p:patients, |
||
v:p.visits |
v:p.visits |
||
}} -.LF</ |
}} -.LF</syntaxhighlight> |
||
Which displays this result: |
Which displays this result: |
||
Line 868: | Line 868: | ||
Another approach would be to use J's csv library: |
Another approach would be to use J's csv library: |
||
< |
<syntaxhighlight lang="j">require'csv' |
||
patients=: fixcsv {{)n |
patients=: fixcsv {{)n |
||
PATIENTID,LASTNAME |
PATIENTID,LASTNAME |
||
Line 904: | Line 904: | ||
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG' |
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG' |
||
labels,:id;nm;vdt;sum;avg |
labels,:id;nm;vdt;sum;avg |
||
}}</ |
}}</syntaxhighlight> |
||
Here: |
Here: |
||
< |
<syntaxhighlight lang="j"> task'' |
||
┌──────────┬────────┬──────────┬─────────┬─────────┐ |
┌──────────┬────────┬──────────┬─────────┬─────────┐ |
||
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ |
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ |
||
Line 916: | Line 916: | ||
│4004 │Wirth │2020-11-05│15.4 │7.7 │ |
│4004 │Wirth │2020-11-05│15.4 │7.7 │ |
||
│5005 │Kurtz │ │ │ │ |
│5005 │Kurtz │ │ │ │ |
||
└──────────┴────────┴──────────┴─────────┴─────────┘</ |
└──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight> |
||
If the empty score in visits was a display concern, we might instead do it this way: |
If the empty score in visits was a display concern, we might instead do it this way: |
||
< |
<syntaxhighlight lang="j">task=: {{ |
||
P=. <@:>"1|:/:~}.patients |
P=. <@:>"1|:/:~}.patients |
||
V=. <@:>"1|:/:~}.visits |
V=. <@:>"1|:/:~}.visits |
||
Line 936: | Line 936: | ||
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG' |
labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG' |
||
labels,:id;nm;vdt;sum;avg |
labels,:id;nm;vdt;sum;avg |
||
}}</ |
}}</syntaxhighlight> |
||
Which gives us: |
Which gives us: |
||
< |
<syntaxhighlight lang="j"> task'' |
||
┌──────────┬────────┬──────────┬─────────┬─────────┐ |
┌──────────┬────────┬──────────┬─────────┬─────────┐ |
||
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ |
│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ |
||
Line 949: | Line 949: | ||
│4004 │Wirth │2020-11-05│15.4 │7.7 │ |
│4004 │Wirth │2020-11-05│15.4 │7.7 │ |
||
│5005 │Kurtz │ │ │ │ |
│5005 │Kurtz │ │ │ │ |
||
└──────────┴────────┴──────────┴─────────┴─────────┘</ |
└──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight> |
||
=={{header|jq}}== |
=={{header|jq}}== |
||
Line 960: | Line 960: | ||
'''Ingesting CSV data''' |
'''Ingesting CSV data''' |
||
<syntaxhighlight lang="jq"> |
|||
<lang jq> |
|||
# objectify/1 takes an array of atomic values as inputs, and packages |
# objectify/1 takes an array of atomic values as inputs, and packages |
||
# these into an object with keys specified by the "headers" array and |
# these into an object with keys specified by the "headers" array and |
||
Line 978: | Line 978: | ||
([]; . + [ $row|objectify($headers) ]); |
([]; . + [ $row|objectify($headers) ]); |
||
</syntaxhighlight> |
|||
</lang> |
|||
'''Aggregation functions''' |
'''Aggregation functions''' |
||
<syntaxhighlight lang="jq"> |
|||
<lang jq> |
|||
# output {LAST_VISIT} |
# output {LAST_VISIT} |
||
def LAST_VISIT($patient_id): |
def LAST_VISIT($patient_id): |
||
Line 990: | Line 990: | ||
| {SCORE_SUM: add, count: length} |
| {SCORE_SUM: add, count: length} |
||
| {SCORE_SUM, SCORE_AVG: (if .SCORE_SUM and .count > 0 then .SCORE_SUM/.count else null end)}; |
| {SCORE_SUM, SCORE_AVG: (if .SCORE_SUM and .count > 0 then .SCORE_SUM/.count else null end)}; |
||
</ |
</syntaxhighlight>'''The task'''<syntaxhighlight lang="jq"> |
||
# Read the two tables: |
# Read the two tables: |
||
INDEX($patients | [splits("\n")] | map(split(",")) | csv2jsonHelper[]; .PATIENT_ID) as $patients |
INDEX($patients | [splits("\n")] | map(split(",")) | csv2jsonHelper[]; .PATIENT_ID) as $patients |
||
Line 1,001: | Line 1,001: | ||
# ... but display it as a sequence of JSON objects |
# ... but display it as a sequence of JSON objects |
||
| .[] |
| .[] |
||
</syntaxhighlight> |
|||
</lang> |
|||
'''Invocation''' |
'''Invocation''' |
||
Line 1,019: | Line 1,019: | ||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
{{trans|Python}} |
{{trans|Python}} |
||
< |
<syntaxhighlight lang="julia">using CSV, DataFrames, Statistics |
||
# load data from csv files |
# load data from csv files |
||
Line 1,058: | Line 1,058: | ||
end |
end |
||
println(df_result) |
println(df_result) |
||
</ |
</syntaxhighlight>{{out}} |
||
<pre> |
<pre> |
||
5×5 DataFrame |
5×5 DataFrame |
||
Line 1,072: | Line 1,072: | ||
=={{header|Mathematica}}/{{header|Wolfram Language}}== |
=={{header|Mathematica}}/{{header|Wolfram Language}}== |
||
< |
<syntaxhighlight lang="mathematica">a = ImportString["PATIENT_ID,LASTNAME |
||
1001,Hopper |
1001,Hopper |
||
4004,Wirth |
4004,Wirth |
||
Line 1,096: | Line 1,096: | ||
"SCORE_SUM" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Total@DeleteMissing@#[[All, "SCORE"]], Missing[]], |
"SCORE_SUM" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Total@DeleteMissing@#[[All, "SCORE"]], Missing[]], |
||
"SCORE_AVG" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Mean@DeleteMissing@#[[All, "SCORE"]], Missing[]]|> & /@ |
"SCORE_AVG" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Mean@DeleteMissing@#[[All, "SCORE"]], Missing[]]|> & /@ |
||
gr // Dataset</ |
gr // Dataset</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> PATIENT_ID LASTNAME VISIT_DATE SCORE_SUM SCORE_AVG |
<pre> PATIENT_ID LASTNAME VISIT_DATE SCORE_SUM SCORE_AVG |
||
Line 1,105: | Line 1,105: | ||
5005 5005 Kurtz \[LongDash] \[LongDash] \[LongDash]</pre> |
5005 5005 Kurtz \[LongDash] \[LongDash] \[LongDash]</pre> |
||
=={{header|Mercury}}== |
=={{header|Mercury}}== |
||
< |
<syntaxhighlight lang="prolog">:- module rosetta. |
||
:- interface. |
:- interface. |
||
Line 1,174: | Line 1,174: | ||
(if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1)), |
(if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1)), |
||
Solutions), |
Solutions), |
||
foldl(io.write_line, Solutions, !IO).</ |
foldl(io.write_line, Solutions, !IO).</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>{Id, Lastname, SumScores, AvgScores, MaxDate}: |
<pre>{Id, Lastname, SumScores, AvgScores, MaxDate}: |
||
Line 1,185: | Line 1,185: | ||
=={{header|Nim}}== |
=={{header|Nim}}== |
||
===CSV files and tables=== |
===CSV files and tables=== |
||
< |
<syntaxhighlight lang="nim">import algorithm, parsecsv, strformat, strutils, tables |
||
const NoValue = -1.0 |
const NoValue = -1.0 |
||
Line 1,237: | Line 1,237: | ||
let scoreSum = if count == 0: "" else: &"{sum:>4.1f}" |
let scoreSum = if count == 0: "" else: &"{sum:>4.1f}" |
||
let scoreAvg = if count == 0: "" else: &"{sum / count.toFloat: >4.2f}" |
let scoreAvg = if count == 0: "" else: &"{sum / count.toFloat: >4.2f}" |
||
echo &"| {id:^10} | {name:^10} | {lastVisit:^10} | {scoreSum:>7} | {scoreAvg:>6} |"</ |
echo &"| {id:^10} | {name:^10} | {lastVisit:^10} | {scoreSum:>7} | {scoreAvg:>6} |"</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 1,250: | Line 1,250: | ||
{{trans|Python}} |
{{trans|Python}} |
||
We use the high level standard library "db_sqlite" rather than the low level one "sqlite3". |
We use the high level standard library "db_sqlite" rather than the low level one "sqlite3". |
||
< |
<syntaxhighlight lang="nim">import parseCsv, db_sqlite, sequtils, strutils |
||
const FNames = ["patients1.csv", "patients2.csv"] |
const FNames = ["patients1.csv", "patients2.csv"] |
||
Line 1,302: | Line 1,302: | ||
echo "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |" |
echo "| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG |" |
||
for row in result: |
for row in result: |
||
echo "| " & row.mapit(it.center(10)).join(" | ") & '|'</ |
echo "| " & row.mapit(it.center(10)).join(" | ") & '|'</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 1,313: | Line 1,313: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
<!--< |
<!--<syntaxhighlight lang="phix">(phixonline)--> |
||
<span style="color: #008080;">constant</span> <span style="color: #000000;">patients_txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #008000;">""" |
<span style="color: #008080;">constant</span> <span style="color: #000000;">patients_txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #008000;">""" |
||
PATIENT_ID,LASTNAME |
PATIENT_ID,LASTNAME |
||
Line 1,371: | Line 1,371: | ||
<span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">avstr</span><span style="color: #0000FF;">})</span> |
<span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">avstr</span><span style="color: #0000FF;">})</span> |
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
||
<!--</ |
<!--</syntaxhighlight>--> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 1,385: | Line 1,385: | ||
Weird requirement: "Use the most common libraries only when built-in functionality is not sufficient." |
Weird requirement: "Use the most common libraries only when built-in functionality is not sufficient." |
||
Not even a "use strict;" :) |
Not even a "use strict;" :) |
||
< |
<syntaxhighlight lang="perl">#!/usr/bin/perl |
||
my $fmt = '| %-11s' x 5 . "|\n"; |
my $fmt = '| %-11s' x 5 . "|\n"; |
||
printf $fmt, qw( PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG); |
printf $fmt, qw( PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG); |
||
Line 1,421: | Line 1,421: | ||
3003,2020-11-12, |
3003,2020-11-12, |
||
4004,2020-11-05,7.0 |
4004,2020-11-05,7.0 |
||
1001,2020-11-19,5.3</ |
1001,2020-11-19,5.3</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 1,433: | Line 1,433: | ||
=={{header|Prolog}}== |
=={{header|Prolog}}== |
||
Implemented using SWI Prolog: |
Implemented using SWI Prolog: |
||
< |
<syntaxhighlight lang="prolog">patient(1001,'Hopper'). |
||
patient(4004,'Wirth'). |
patient(4004,'Wirth'). |
||
patient(3003,'Kemeny'). |
patient(3003,'Kemeny'). |
||
Line 1,468: | Line 1,468: | ||
summaryScores(Id, Lastname, ScoreSum, ScoreMean), |
summaryScores(Id, Lastname, ScoreSum, ScoreMean), |
||
writeln(summaryScores(Id, Lastname, ScoreSum, ScoreMean)), |
writeln(summaryScores(Id, Lastname, ScoreSum, ScoreMean)), |
||
fail.</ |
fail.</syntaxhighlight> |
||
{{output}} |
{{output}} |
||
<pre>summaryDates(1001,Hopper,2020-11-19) |
<pre>summaryDates(1001,Hopper,2020-11-19) |
||
Line 1,479: | Line 1,479: | ||
false.</pre> |
false.</pre> |
||
Implemented using XSB Prolog (which allows for user-defined aggregates): |
Implemented using XSB Prolog (which allows for user-defined aggregates): |
||
< |
<syntaxhighlight lang="prolog">:- import bagMax/2, bagCount/2, bagSum/2, bagReduce/4 from aggregs. |
||
:- import julian_date/7, date_string/3 from iso8601. |
:- import julian_date/7, date_string/3 from iso8601. |
||
:- import load_csv/2, add_cvt_type_hook/2 from proc_files. |
:- import load_csv/2, add_cvt_type_hook/2 from proc_files. |
||
Line 1,527: | Line 1,527: | ||
sum(X,Y,Z) :- Z is X+Y. |
sum(X,Y,Z) :- Z is X+Y. |
||
:- hilog successor. |
:- hilog successor. |
||
successor(X,_Y,Z) :- Z is X+1.</ |
successor(X,_Y,Z) :- Z is X+1.</syntaxhighlight> |
||
=={{header|PureBasic}}== |
=={{header|PureBasic}}== |
||
< |
<syntaxhighlight lang="purebasic">Structure Person |
||
Name$ |
Name$ |
||
EndStructure |
EndStructure |
||
Line 1,590: | Line 1,590: | ||
PrintN("") : scs=0 : c=0 |
PrintN("") : scs=0 : c=0 |
||
Next |
Next |
||
Input()</ |
Input()</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG | |
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG | |
||
Line 1,602: | Line 1,602: | ||
===Python: Using pandas library=== |
===Python: Using pandas library=== |
||
< |
<syntaxhighlight lang="python"># to install pandas library go to cmd prompt and type: |
||
# cd %USERPROFILE%\AppData\Local\Programs\Python\Python38-32\Scripts\ |
# cd %USERPROFILE%\AppData\Local\Programs\Python\Python38-32\Scripts\ |
||
# pip install pandas |
# pip install pandas |
||
Line 1,646: | Line 1,646: | ||
df_result = df_group.agg({'VISIT_DATE': 'max', 'SCORE': [lambda x: x.sum(min_count=1),'mean']}) |
df_result = df_group.agg({'VISIT_DATE': 'max', 'SCORE': [lambda x: x.sum(min_count=1),'mean']}) |
||
print(df_result)</ |
print(df_result)</syntaxhighlight> |
||
<pre> |
<pre> |
||
PATIENT_ID LASTNAME LAST_VISIT SCORE |
PATIENT_ID LASTNAME LAST_VISIT SCORE |
||
Line 1,660: | Line 1,660: | ||
Using only standard libraries and input from csv files. |
Using only standard libraries and input from csv files. |
||
< |
<syntaxhighlight lang="python">import csv |
||
fnames = 'patients.csv patients_visits.csv'.split() |
fnames = 'patients.csv patients_visits.csv'.split() |
||
Line 1,695: | Line 1,695: | ||
#%% |
#%% |
||
for record in result: |
for record in result: |
||
print(f"| {' | '.join(f'{r:^10}' for r in record)} |")</ |
print(f"| {' | '.join(f'{r:^10}' for r in record)} |")</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 1,710: | Line 1,710: | ||
(The SQL SELECT statement is modelled on that of the SQL entry elsewhere on this page). |
(The SQL SELECT statement is modelled on that of the SQL entry elsewhere on this page). |
||
< |
<syntaxhighlight lang="python">import sqlite3 |
||
import csv |
import csv |
||
Line 1,774: | Line 1,774: | ||
result = join_tables_and_group(conn) |
result = join_tables_and_group(conn) |
||
for record in result: |
for record in result: |
||
print(f"| {' | '.join(f'{str(r):^10}' for r in record)} |")</ |
print(f"| {' | '.join(f'{str(r):^10}' for r in record)} |")</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 1,785: | Line 1,785: | ||
=={{header|R}}== |
=={{header|R}}== |
||
< |
<syntaxhighlight lang="r"># load data from csv files |
||
# setwd("C:\Temp\csv\") |
# setwd("C:\Temp\csv\") |
||
# df_patient <- read.csv(file="patients.csv", header = TRUE, sep = ",") |
# df_patient <- read.csv(file="patients.csv", header = TRUE, sep = ",") |
||
Line 1,826: | Line 1,826: | ||
df_result <- merge(df_patient, df_agg, by = 'PATIENT_ID', all.x = TRUE) |
df_result <- merge(df_patient, df_agg, by = 'PATIENT_ID', all.x = TRUE) |
||
print(df_result)</ |
print(df_result)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 1,838: | Line 1,838: | ||
=={{header|Raku}}== |
=={{header|Raku}}== |
||
<lang |
<syntaxhighlight lang="raku" line>my @names = map { ( <PATIENT_ID LASTNAME> Z=> .list ).hash }, |
||
( 1001, 'Hopper' ), |
( 1001, 'Hopper' ), |
||
( 4004, 'Wirth' ), |
( 4004, 'Wirth' ), |
||
Line 1,874: | Line 1,874: | ||
my @out_field_names = <PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG>; |
my @out_field_names = <PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG>; |
||
my @rows = @result.sort(*.<PATIENT_ID>).map(*.{@out_field_names}); |
my @rows = @result.sort(*.<PATIENT_ID>).map(*.{@out_field_names}); |
||
say .map({$_ // ''}).fmt('%-10s', ' | ') for @out_field_names, |@rows;</ |
say .map({$_ // ''}).fmt('%-10s', ' | ') for @out_field_names, |@rows;</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 1,886: | Line 1,886: | ||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
< |
<syntaxhighlight lang="rexx">/* REXX */ |
||
patients='patients.csv' |
patients='patients.csv' |
||
l=linein(patients) |
l=linein(patients) |
||
Line 1,966: | Line 1,966: | ||
swl=swl wa.i |
swl=swl wa.i |
||
End |
End |
||
Return strip(swl)</ |
Return strip(swl)</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG | |
<pre>| PATIENT_ID | LASTNAME | LAST_VISIT | SCORE_SUM | SCORE_AVG | |
||
Line 1,975: | Line 1,975: | ||
| 5005 | Kurtz | | | |</pre> |
| 5005 | Kurtz | | | |</pre> |
||
=={{header|SAS}}== |
=={{header|SAS}}== |
||
< |
<syntaxhighlight lang="sas"> %let datefmt=E8601DA10.; |
||
data patient; |
data patient; |
||
infile "patient.csv" dsd dlm=','; |
infile "patient.csv" dsd dlm=','; |
||
Line 1,994: | Line 1,994: | ||
avg(score) as avg_score from visit group by id) |
avg(score) as avg_score from visit group by id) |
||
natural right join patient |
natural right join patient |
||
order by id;</ |
order by id;</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>: id lastname max_date sum_score avg_score |
<pre>: id lastname max_date sum_score avg_score |
||
Line 2,008: | Line 2,008: | ||
Note: the PATIENT_ID 5005 is missing from the result, because MATCH FILES cannot add rows for missing keys, i.e. AFAIK a "left join" is not possible is SPSS. |
Note: the PATIENT_ID 5005 is missing from the result, because MATCH FILES cannot add rows for missing keys, i.e. AFAIK a "left join" is not possible is SPSS. |
||
< |
<syntaxhighlight lang="spss">* set working directory to location of .csv files |
||
CD 'C:\Temp\csv\'. |
CD 'C:\Temp\csv\'. |
||
Line 2,073: | Line 2,073: | ||
/last_visit = MAX(VISIT_DATE) |
/last_visit = MAX(VISIT_DATE) |
||
/score_avg = MEAN(SCORE) |
/score_avg = MEAN(SCORE) |
||
/score_sum = SUM(SCORE).</ |
/score_sum = SUM(SCORE).</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 2,086: | Line 2,086: | ||
=={{header|SQL}}== |
=={{header|SQL}}== |
||
< |
<syntaxhighlight lang="sql">-- drop tables |
||
DROP TABLE IF EXISTS tmp_patients; |
DROP TABLE IF EXISTS tmp_patients; |
||
DROP TABLE IF EXISTS tmp_visits; |
DROP TABLE IF EXISTS tmp_visits; |
||
Line 2,144: | Line 2,144: | ||
p.LASTNAME |
p.LASTNAME |
||
ORDER BY |
ORDER BY |
||
p.PATIENT_ID;</ |
p.PATIENT_ID;</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 2,156: | Line 2,156: | ||
=={{header|Transd}}== |
=={{header|Transd}}== |
||
< |
<syntaxhighlight lang="scheme">#lang transd |
||
MainModule: { |
MainModule: { |
||
Line 2,214: | Line 2,214: | ||
)) |
)) |
||
)) |
)) |
||
}</ |
}</syntaxhighlight>{{out}} |
||
<pre> |
<pre> |
||
PATIENT_ID| LASTNAME|VISIT_DATE| SCORE| SCORE_AVG| |
PATIENT_ID| LASTNAME|VISIT_DATE| SCORE| SCORE_AVG| |
||
Line 2,225: | Line 2,225: | ||
=={{header|TutorialD}}== |
=={{header|TutorialD}}== |
||
<lang>BEGIN; |
<syntaxhighlight lang="text">BEGIN; |
||
TYPE Date UNION; |
TYPE Date UNION; |
||
TYPE DateValid IS {Date POSSREP {year INTEGER, month INTEGER, day INTEGER}}; |
TYPE DateValid IS {Date POSSREP {year INTEGER, month INTEGER, day INTEGER}}; |
||
Line 2,260: | Line 2,260: | ||
(SUMMARIZE visit BY {id}: {maxdate := MAX(date)} UNION |
(SUMMARIZE visit BY {id}: {maxdate := MAX(date)} UNION |
||
(EXTEND (patient {id} MINUS (visit {id})): {maxdate := DateUnknown()})) JOIN |
(EXTEND (patient {id} MINUS (visit {id})): {maxdate := DateUnknown()})) JOIN |
||
patient</ |
patient</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 2,273: | Line 2,273: | ||
{{libheader|Wren-sort}} |
{{libheader|Wren-sort}} |
||
{{libheader|Wren-fmt}} |
{{libheader|Wren-fmt}} |
||
< |
<syntaxhighlight lang="ecmascript">import "/fmt" for Fmt |
||
import "/sort" for Sort |
import "/sort" for Sort |
||
Line 2,368: | Line 2,368: | ||
var merges = Patient.ids.map { |id| Merge.new(id) }.toList |
var merges = Patient.ids.map { |id| Merge.new(id) }.toList |
||
Merge.print(merges)</ |
Merge.print(merges)</syntaxhighlight> |
||
{{out}} |
{{out}} |