Bin given limits: Difference between revisions

From Rosetta Code
Content added Content deleted
m (Formatting.)
Line 54: Line 54:


Show output here, on this page.
Show output here, on this page.

=={{header|Phix}}==
<lang Phix>function bin_it(sequence limits, data)
-- Bin data according to (ascending) limits.
sequence bins = repeat(0,length(limits)+1) -- adds under/over range bins too
for i = 1 to length(data) do
integer bdx = binary_search(data[i],limits)
bins[abs(bdx)+(bdx>0)] += 1
end for
return bins
end function

procedure bin_print(sequence limits, bins)
printf(1," < %3d := %3d\n",{limits[1],bins[1]})
for i=2 to length(limits) do
printf(1,">= %3d .. < %3d := %3d\n",{limits[i-1],limits[i],bins[i]})
end for
printf(1,">= %3d := %3d\n",{limits[$],bins[$]})
end procedure
sequence limits, data, bins
printf(1,"RC FIRST EXAMPLE\n")
limits = {23, 37, 43, 53, 67, 83}
data = {95,21,94,12,99,4,70,75,83,93,52,80,57,5,53,86,65,17,92,83,71,61,54,58,47,
16,8,9,32,84,7,87,46,19,30,37,96,6,98,40,79,97,45,64,60,29,49,36,43,55}
bins = bin_it(limits, data)
bin_print(limits, bins)

printf(1,"\nRC SECOND EXAMPLE\n")
limits = {14, 18, 249, 312, 389, 392, 513, 591, 634, 720}
data = {
445,814,519,697,700,130,255,889,481,122,
932, 77,323,525,570,219,367,523,442,933,
416,589,930,373,202,253,775, 47,731,685,
293,126,133,450,545,100,741,583,763,306,
655,267,248,477,549,238, 62,678, 98,534,
622,907,406,714,184,391,913, 42,560,247,
346,860, 56,138,546, 38,985,948, 58,213,
799,319,390,634,458,945,733,507,916,123,
345,110,720,917,313,845,426, 9,457,628,
410,723,354,895,881,953,677,137,397, 97,
854,740, 83,216,421, 94,517,479,292,963,
376,981,480, 39,257,272,157, 5,316,395,
787,942,456,242,759,898,576, 67,298,425,
894,435,831,241,989,614,987,770,384,692,
698,765,331,487,251,600,879,342,982,527,
736,795,585, 40, 54,901,408,359,577,237,
605,847,353,968,832,205,838,427,876,959,
686,646,835,127,621,892,443,198,988,791,
466, 23,707,467, 33,670,921,180,991,396,
160,436,717,918, 8,374,101,684,727,749
}
bins = bin_it(limits, data)
bin_print(limits, bins)</lang>
{{out}}
<pre>
RC FIRST EXAMPLE
< 23 := 11
>= 23 .. < 37 := 4
>= 37 .. < 43 := 2
>= 43 .. < 53 := 6
>= 53 .. < 67 := 9
>= 67 .. < 83 := 5
>= 83 := 13

RC SECOND EXAMPLE
< 14 := 3
>= 14 .. < 18 := 0
>= 18 .. < 249 := 44
>= 249 .. < 312 := 10
>= 312 .. < 389 := 16
>= 389 .. < 392 := 2
>= 392 .. < 513 := 28
>= 513 .. < 591 := 16
>= 591 .. < 634 := 6
>= 634 .. < 720 := 16
>= 720 := 59
</pre>


=={{header|Python}}==
=={{header|Python}}==

Revision as of 15:42, 6 February 2021

Bin given limits is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

You are given a list of n ascending, unique numbers which are to form limits for n+1 bins which count how many of a large set of input numbers fall in the range of each bin.

(Assuming zero-based indexing)

   bin[0] counts how many inputs are < limit[0]
   bin[1] counts how many inputs are >= limit[0] and < limit[1]
   ..
   bin[n-1] counts how many inputs are >= limit[n-2] and < limit[n-1]
   bin[n] counts how many inputs are > limit[n-1]
Task

The task is to create a function that given the ascending limits and a stream/ list of numbers, will return the bins; together with another function that given the same list of limits and the binning will print the limit of each bin together with the count of items that fell in the range.

Assume the numbers to bin are too large to practically sort.

Task examples

Part 1: Bin using the following limits the given input data

   limits  = [23, 37, 43, 53, 67, 83]
   data = [95,21,94,12,99,4,70,75,83,93,52,80,57,5,53,86,65,17,92,83,71,61,54,58,47,
           16,8,9,32,84,7,87,46,19,30,37,96,6,98,40,79,97,45,64,60,29,49,36,43,55]

Part 2: Bin using the following limits the given input data

   limits = [14, 18, 249, 312, 389, 392, 513, 591, 634, 720]
   data = [
       445,814,519,697,700,130,255,889,481,122,
       932, 77,323,525,570,219,367,523,442,933,
       416,589,930,373,202,253,775, 47,731,685,
       293,126,133,450,545,100,741,583,763,306,
       655,267,248,477,549,238, 62,678, 98,534,
       622,907,406,714,184,391,913, 42,560,247,
       346,860, 56,138,546, 38,985,948, 58,213,
       799,319,390,634,458,945,733,507,916,123,
       345,110,720,917,313,845,426,  9,457,628,
       410,723,354,895,881,953,677,137,397, 97,
       854,740, 83,216,421, 94,517,479,292,963,
       376,981,480, 39,257,272,157,  5,316,395,
       787,942,456,242,759,898,576, 67,298,425,
       894,435,831,241,989,614,987,770,384,692,
       698,765,331,487,251,600,879,342,982,527,
       736,795,585, 40, 54,901,408,359,577,237,
       605,847,353,968,832,205,838,427,876,959,
       686,646,835,127,621,892,443,198,988,791,
       466, 23,707,467, 33,670,921,180,991,396,
       160,436,717,918,  8,374,101,684,727,749
           ]

Show output here, on this page.

Phix

<lang Phix>function bin_it(sequence limits, data)

   -- Bin data according to (ascending) limits.
   sequence bins = repeat(0,length(limits)+1)  -- adds under/over range bins too
   for i = 1 to length(data) do
       integer bdx = binary_search(data[i],limits)
       bins[abs(bdx)+(bdx>0)] += 1
   end for
   return bins

end function

procedure bin_print(sequence limits, bins)

   printf(1,"          < %3d := %3d\n",{limits[1],bins[1]})
   for i=2 to length(limits) do
       printf(1,">= %3d .. < %3d := %3d\n",{limits[i-1],limits[i],bins[i]})
   end for
   printf(1,">= %3d          := %3d\n",{limits[$],bins[$]})

end procedure

sequence limits, data, bins printf(1,"RC FIRST EXAMPLE\n") limits = {23, 37, 43, 53, 67, 83} data = {95,21,94,12,99,4,70,75,83,93,52,80,57,5,53,86,65,17,92,83,71,61,54,58,47,

       16,8,9,32,84,7,87,46,19,30,37,96,6,98,40,79,97,45,64,60,29,49,36,43,55}

bins = bin_it(limits, data) bin_print(limits, bins)

printf(1,"\nRC SECOND EXAMPLE\n") limits = {14, 18, 249, 312, 389, 392, 513, 591, 634, 720} data = {

   445,814,519,697,700,130,255,889,481,122,
   932, 77,323,525,570,219,367,523,442,933,
   416,589,930,373,202,253,775, 47,731,685,
   293,126,133,450,545,100,741,583,763,306,
   655,267,248,477,549,238, 62,678, 98,534,
   622,907,406,714,184,391,913, 42,560,247,
   346,860, 56,138,546, 38,985,948, 58,213,
   799,319,390,634,458,945,733,507,916,123,
   345,110,720,917,313,845,426,  9,457,628,
   410,723,354,895,881,953,677,137,397, 97,
   854,740, 83,216,421, 94,517,479,292,963,
   376,981,480, 39,257,272,157,  5,316,395,
   787,942,456,242,759,898,576, 67,298,425,
   894,435,831,241,989,614,987,770,384,692,
   698,765,331,487,251,600,879,342,982,527,
   736,795,585, 40, 54,901,408,359,577,237,
   605,847,353,968,832,205,838,427,876,959,
   686,646,835,127,621,892,443,198,988,791,
   466, 23,707,467, 33,670,921,180,991,396,
   160,436,717,918,  8,374,101,684,727,749
       }

bins = bin_it(limits, data) bin_print(limits, bins)</lang>

Output:
RC FIRST EXAMPLE
          <  23 :=  11
>=  23 .. <  37 :=   4
>=  37 .. <  43 :=   2
>=  43 .. <  53 :=   6
>=  53 .. <  67 :=   9
>=  67 .. <  83 :=   5
>=  83          :=  13

RC SECOND EXAMPLE
          <  14 :=   3
>=  14 .. <  18 :=   0
>=  18 .. < 249 :=  44
>= 249 .. < 312 :=  10
>= 312 .. < 389 :=  16
>= 389 .. < 392 :=   2
>= 392 .. < 513 :=  28
>= 513 .. < 591 :=  16
>= 591 .. < 634 :=   6
>= 634 .. < 720 :=  16
>= 720          :=  59

Python

This example uses binary search through the limits to assign each number to its bin, via standard module bisect.bisect_right.
The Counter module is not used as the number of bins is known allowing faster array access for incrementing bin counts versus dict lookup.

<lang python>from bisect import bisect_right

def bin_it(limits: list, data: list) -> list:

   "Bin data according to (ascending) limits."
   bins = [0] * (len(limits) + 1)      # adds under/over range bins too
   for d in data:
       bins[bisect_right(limits, d)] += 1
   return bins

def bin_print(limits: list, bins: list) -> list:

   print(f"          < {limits[0]:3} := {bins[0]:3}")
   for lo, hi, count in zip(limits, limits[1:], bins[1:]):
       print(f">= {lo:3} .. < {hi:3} := {count:3}")
   print(f">= {limits[-1]:3}          := {bins[-1]:3}")


if __name__ == "__main__":

   print("RC FIRST EXAMPLE\n")
   limits  = [23, 37, 43, 53, 67, 83]
   data = [95,21,94,12,99,4,70,75,83,93,52,80,57,5,53,86,65,17,92,83,71,61,54,58,47,
           16,8,9,32,84,7,87,46,19,30,37,96,6,98,40,79,97,45,64,60,29,49,36,43,55]
   bins = bin_it(limits, data)
   bin_print(limits, bins)
   print("\nRC SECOND EXAMPLE\n")
   limits = [14, 18, 249, 312, 389, 392, 513, 591, 634, 720]
   data = [
       445,814,519,697,700,130,255,889,481,122,
       932, 77,323,525,570,219,367,523,442,933,
       416,589,930,373,202,253,775, 47,731,685,
       293,126,133,450,545,100,741,583,763,306,
       655,267,248,477,549,238, 62,678, 98,534,
       622,907,406,714,184,391,913, 42,560,247,
       346,860, 56,138,546, 38,985,948, 58,213,
       799,319,390,634,458,945,733,507,916,123,
       345,110,720,917,313,845,426,  9,457,628,
       410,723,354,895,881,953,677,137,397, 97,
       854,740, 83,216,421, 94,517,479,292,963,
       376,981,480, 39,257,272,157,  5,316,395,
       787,942,456,242,759,898,576, 67,298,425,
       894,435,831,241,989,614,987,770,384,692,
       698,765,331,487,251,600,879,342,982,527,
       736,795,585, 40, 54,901,408,359,577,237,
       605,847,353,968,832,205,838,427,876,959,
       686,646,835,127,621,892,443,198,988,791,
       466, 23,707,467, 33,670,921,180,991,396,
       160,436,717,918,  8,374,101,684,727,749
           ]
   bins = bin_it(limits, data)
   bin_print(limits, bins)</lang>
Output:
RC FIRST EXAMPLE

          <  23 :=  11
>=  23 .. <  37 :=   4
>=  37 .. <  43 :=   2
>=  43 .. <  53 :=   6
>=  53 .. <  67 :=   9
>=  67 .. <  83 :=   5
>=  83          :=  13

RC SECOND EXAMPLE

          <  14 :=   3
>=  14 .. <  18 :=   0
>=  18 .. < 249 :=  44
>= 249 .. < 312 :=  10
>= 312 .. < 389 :=  16
>= 389 .. < 392 :=   2
>= 392 .. < 513 :=  28
>= 513 .. < 591 :=  16
>= 591 .. < 634 :=   6
>= 634 .. < 720 :=  16
>= 720          :=  59