Averages/Mode

From Rosetta Code
Task
Averages/Mode
You are encouraged to solve this task according to the task description, using any language you may know.

Write a program to find the mode value of a collection. The case where the collection is empty may be ignored. Care must be taken to handle the case where the mode is non-unique.

If it is not appropriate or possible to support a general collection, use a vector (array), if possible. If it is not appropriate or possible to support an unspecified value type, use integers.

See also: Mean, Median

AutoHotkey

Search autohotkey.com: [1]

Source: AutoHotkey forum by Laszlo <lang autohotkey>MsgBox % Mode("1 2 3") MsgBox % Mode("1 2 0 3 0.0") MsgBox % Mode("0.1 2.2 -0.1 0.22e1 2.20 0.1")

Mode(a, d=" ") { ; the number that occurs most frequently in a list delimited by d (space)

  Sort a, ND%d% 
  Loop Parse, a, %d% 
     If (V != A_LoopField) { 
        If (Ct > MxCt) 
           MxV := V, MxCt := Ct 
        V := A_LoopField, Ct := 1 
     } 
     Else Ct++ 
  Return Ct>MxCt ? V : MxV 

}</lang>

C

Here we use an array of double (easily we could change it to handle integers or any other basic type). <lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <stdbool.h>
  1. define INC_VAL 50

// to hold the results typedef struct stack {

 double *data;
 size_t n;
 size_t cap;

} stack_t;

// to hold values and their frequencies typedef struct fav_el {

 double v;
 size_t f;

} fav_el_t;

typedef struct freqs_and_vals_table {

 fav_el_t *vf;
 size_t n;
 size_t cap;

} fav_t;</lang>

<lang c>fav_t *alloc_table() {

 fav_t *r = malloc(sizeof(fav_t));
 r->n = 0;
 r->cap = 1;
 r->vf = malloc(sizeof(fav_el_t)*INC_VAL);
 return r;

}

void free_table(fav_t *t) {

 free(t->vf);
 free(t);

}

// compare functions for bsearch and qsort

  1. define CMP_HOOK(F) ((int (*)(const void *, const void *))F)

int t_cmp(double *k, double *v) {

 if ( *k < *v )
   return -1;
 else if ( *k > *v )
   return 1;
 else
   return 0;

}

int s_cmp(fav_el_t *a, fav_el_t *b) {

 return t_cmp(&a->v, &b->v);

}

int f_cmp(fav_el_t *a, fav_el_t *b) {

 if ( a->f < b->f )
   return 1;
 else if ( a->f > b->f )
   return -1;
 else
   return 0;

}

size_t table_search(fav_t *t, double v) {

 fav_el_t *el = bsearch(&v, t->vf, t->n, sizeof(fav_el_t), CMP_HOOK(s_cmp));
 return (el==NULL) ? -1 : (el - t->vf);

}

void add_value(fav_t *t, double v) {

 int i;
 if ( (i=table_search(t, v)) < 0 ) {
   if ( (t->n + 1) > (t->cap * INC_VAL) ) {
     t->cap++;
     t->vf = realloc(t->vf, t->cap * INC_VAL * sizeof(fav_el_t));
   }
   t->vf[t->n].v = v;
   t->vf[t->n].f = 1;
   t->n++;
 } else {
   t->vf[i].f++;
 }

}

void collect_from_array(fav_t *t, double *v, size_t n) {

 size_t i;
 qsort(v, n, sizeof(double), CMP_HOOK(t_cmp));
 for(i=0; i < n; i++) {
   add_value(t, v[i]);
 }

}

stack_t *init_stack() {

 stack_t *r = malloc(sizeof(stack_t));
 r->n = 0;
 r->cap = 1;
 r->data = malloc(sizeof(double)*INC_VAL);
 return r;

}

void free_stack(stack_t *s) {

 free(s->data);
 free(s);

}

void push_value(stack_t *s, double v) {

 if ( (s->n + 1) > (s->cap * INC_VAL) ) {
   s->cap++;
   s->data = realloc(s->data, s->cap * INC_VAL * sizeof(double));
 }
 s->data[s->n] = v;
 s->n++;

}</lang>

All the codes above are utilities to keep this main function simple. First, we build a table of value-frequency, for each different value, using the utility function collect_from_array. Then we sort the value-frequency pair according to the frequency (descending, cfr. f_cmp), and push on a "stack" all the values with the biggest same frequency.

<lang c>stack_t *mode(double *v, size_t n) {

 size_t i, f;
 stack_t *r;
 fav_t *t1 = alloc_table();
 collect_from_array(t1, v, n);
 // now let's sort according to freqs
 qsort(t1->vf, t1->n, sizeof(fav_el_t), CMP_HOOK(f_cmp));
 r = init_stack();
 f = t1->vf[0].f;
 push_value(r, t1->vf[0].v);
 for(i=1; (i < t1->n) && (t1->vf[i].f == f); i++)
   push_value(r, t1->vf[i].v);
 free_table(t1);
 return r;

}</lang>

<lang c>double v1[] = { 1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17 }; double v2[] = { 1, 1, 2, 4, 4 };

  1. define PRINT_MODE(V) do { size_t i; \
   stack_t *r = mode(V, sizeof(V)/sizeof(double));	\
   for(i=0; i < r->n; i++) printf("%lf ", r->data[i]); \
   free_stack(r); printf("\n");			\
 } while(0);
 

int main() {

 PRINT_MODE(v1)
 PRINT_MODE(v2);
 return EXIT_SUCCESS;

}</lang>

C++

Works with: g++ version 4.3.2

<lang cpp>

  1. include <iterator>
  2. include <utility>
  3. include <algorithm>
  4. include <list>
  5. include <iostream>

// helper struct template<typename T> struct referring {

 referring(T const& t): value(t) {}
 template<typename Iter>
  bool operator()(std::pair<Iter, int> const& p)
 {
   return *p.first == value;
 }
 T const& value;

};

// requires: // FwdIterator is a ForwardIterator // The value_type of FwdIterator is EqualityComparable // OutIterator is an output iterator // the value_type of FwdIterator is convertible to the value_type of OutIterator // [first, last) is a valid range // provides: // the mode is written to result template<typename FwdIterator, typename OutIterator>

void mode(FwdIterator first, FwdIterator last, OutIterator result)

{

 typedef typename std::iterator_traits<FwdIterator>::value_type value_type;
 typedef std::list<std::pair<FwdIterator, int> > count_type;
 typedef typename count_type::iterator count_iterator;
 // count elements
 count_type counts;
 while (first != last)
 {
   count_iterator element = std::find_if(counts.begin(), counts.end(),
                                         referring<value_type>(*first));
   if (element == counts.end())
     counts.push_back(std::make_pair(first, 1));
   else
     ++element->second;
   ++first;
 }
 // find maximum
 int max = 0;
 for (count_iterator i = counts.begin(); i != counts.end(); ++i)
   if (i->second > max)
     max = i->second;
 // copy corresponding elements to output sequence
 for (count_iterator i = counts.begin(); i != counts.end(); ++i)
   if (i->second == max)
     *result++ = *i->first;

}

// example usage int main() {

 int values[] = { 1, 2, 3, 1, 2, 4, 2, 5, 2, 3, 3, 1, 3, 6 };
 median(values, values + sizeof(values)/sizeof(int),
        std::ostream_iterator<int>(std::cout, " "));
 std::cout << std::endl;

} </lang> Output:

2 3

Common Lisp

The following returns a list of the modes of a sequence as the primary value, and the frequency as the secondary value. E.g., (mode '(a b c d a b c a b)) produces (A B) and 3. hash-table-options can be used to customize the hash table, e.g., to specify the test by which elements are compared. <lang lisp>(defun mode (sequence &rest hash-table-options)

 (let ((frequencies (apply #'make-hash-table hash-table-options)))
   (map nil (lambda (element)
              (incf (gethash element frequencies 0)))
        sequence)
   (let ((modes '())
         (hifreq 0 ))
     (maphash (lambda (element frequency)
                (cond ((> frequency hifreq)
                       (setf hifreq frequency
                             modes  (list element)))
                      ((= frequency hifreq)
                       (push element modes))))
              frequencies)
     (values modes hifreq))))</lang>

D

This getMode function works for all primitive types and also for complex types that implement opCmp. <lang d> import std.stdio;

// returns an array containing the mode (array just in case there is more than one mode) T[]getMode(T)(T[]set) {

       int[T]aa;
       foreach(element;set) {
               if ((element in aa) is null) aa[element] = 0;
               aa[element]++;
       }
       int modenum = 0;
       T[]ret;
       foreach(key,value;aa) {
               if (value > modenum) {
                       modenum = value;
                       ret = [key];
               } else if (value == modenum) ret ~= key;
       }
       return ret;

}

int main() {

       int[]vals = [1, 2, 3, 1, 2, 4, 2, 5, 3, 3, 1, 3, 6];
       int[]ret = getMode(vals);
       foreach(mode;ret) writefln("Got mode %d for first set",mode);
       vals ~= 2;
       ret = getMode(vals);
       foreach(mode;ret) writefln("Got mode %d for second set",mode);
       return 0;

} </lang>

E

<lang e>pragma.enable("accumulator") def mode(values) {

   def counts := [].asMap().diverge()
   var maxCount := 0
   for v in values {
       maxCount max= (counts[v] := counts.fetch(v, fn{0}) + 1)
   }
   return accum [].asSet() for v => ==maxCount in counts { _.with(v) }

}</lang>

<lang e>? mode([1,1,2,2,3,3,4,4,4,5,5,6,6,7,8,8,9,9,0,0,0])

  1. value: [4, 0].asSet()</lang>

In the line "maxCount max= (counts[v] := counts.fetch(v, fn{0}) + 1)", max= is an update-assignment operation like +=. (The parentheses are unnecessary.) A more verbose version would be:

<lang e> def newCount := counts.fetch(v, fn { 0 }) + 1

 counts[v] := newCount
 maxCount := maxCount.max(newCount)</lang>
 

In for loops, each key and value from the collection are pattern matched against the specified key pattern => value pattern. In "for v => ==maxCount in counts", the == is a pattern-match operator which fails unless the value examined is equal to the specified value; so this selects only the input values (keys in counts) whose counts are equal to the maximum count.

Haskell

The following solutions require that the element type is an instance of Ord (i.e. comparable). <lang haskell>import Prelude (foldr, maximum, (==), (+)) import Data.Map (insertWith', empty, filter, elems, keys)

mode xs = keys (filter (== maximum (elems counts)) counts)

 where counts = foldr (\x -> insertWith' (+) x 1) empty xs</lang>

counts is a map from each value found in xs to the number of occurrences (foldr traverses the list, insertWith' increments the count). This map is then filtered to only those entries whose count is the maximum count, and their keys (the values from the input list) are returned.

> mode [1,2,3,3,2,1,1]
[1]
> mode [1,2,3,3,2,1]
[1,2,3]

Alternately: <lang haskell>import Data.List (group, sort)

mode xs = map fst $ filter ((==best).snd) counts

   where counts = map (\l -> (head l, length l)) . group . sort $ xs
         best = maximum (map snd counts)</lang>

J

mode=:  ~.  #~  ( = >./ )@( #/.~ )

Java

<lang java>import java.util.*;

public class Mode {

   public static <T> List<T> mode(List<? extends T> coll) {
       Map<T, Integer> seen = new HashMap<T, Integer>();
       int max = 0;
       List<T> maxElems = new ArrayList<T>();
       for (T value : coll) {
           if (seen.containsKey(value))
               seen.put(value, seen.get(value) + 1);
           else
               seen.put(value, 1);
           if (seen.get(value) > max) {
               max = seen.get(value);
               maxElems.clear();
               maxElems.add(value);
           } else if (seen.get(value) == max) {
               maxElems.add(value);
           }
       }
       return maxElems;
   }
   public static void main(String[] args) {
       System.out.println(mode(Arrays.asList(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))); // prints [6]
       System.out.println(mode(Arrays.asList(1, 1, 2, 4, 4))); // prints [1, 4]
   }

}</lang>

JavaScript

<lang javascript>function mode(ary) {

   var counter = {};
   var mode = [];
   var max = 0;
   for (var i in ary) {
       if (!(ary[i] in counter))
           counter[ary[i]] = 0;
       counter[ary[i]]++;
       if (counter[ary[i]] == max) 
           mode.push(ary[i]);
       else if (counter[ary[i]] > max) {
           max = counter[ary[i]];
           mode = [ary[i]];
       }
   }
   return mode; 

}

mode([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]); // [6] mode([1, 2, 4, 4, 1]); // [1,4]</lang>

Mathematica

Built-in function commonest returns a list of the most common element(s), even is there is only one 'commonest' number. Example for multiple 'commonest' numbers and a single 'commonest' number: <lang Mathematica>

Commonest[{b, a, c, 2, a, b, 1, 2, 3}]
Commonest[{1, 3, 2, 3}]

</lang> gives back: <lang Mathematica>

{b,a,2}
{3}

</lang>

MATLAB

<lang Matlab>function [modeValue] = findmode(setOfValues)

  modeValue = mode(setOfValues);</lang>

Objective-C

<lang objc>#import <Foundation/Foundation.h>

@interface NSArray (Mode) - (NSArray *)mode; @end

@implementation NSArray (Mode) - (NSArray *)mode {

   NSCountedSet *seen = [NSCountedSet setWithArray:self];
   int max = 0;
   NSMutableArray *maxElems = [NSMutableArray array];
   NSEnumerator *enm = [seen objectEnumerator];
   id obj;
   while( (obj = [enm nextObject]) ) {
       int count = [seen countForObject:obj];
       if (count > max) {
           max = count;
           [maxElems removeAllObjects];
           [maxElems addObject:obj];
       } else if (count == max) {
           [maxElems addObject:obj];
       }
   }
   return maxElems;

} @end</lang>

OCaml

<lang ocaml>let mode lst =

 let seen = Hashtbl.create 42 in
   List.iter (fun x ->
                let old = if Hashtbl.mem seen x then
                  Hashtbl.find seen x
                else 0 in
                  Hashtbl.replace seen x (old + 1))
     lst;
   let best = Hashtbl.fold (fun _ -> max) seen 0 in
     Hashtbl.fold (fun k v acc ->
                     if v = best then k :: acc
                     else acc)
       seen []</lang>
# mode [1;3;6;6;6;6;7;7;12;12;17];;
- : int list = [6]
# mode [1;1;2;4;4];;
- : int list = [4; 1]

Octave

Of course Octave has the mode function; but it returns only the "lowest" mode if multiple modes are available.

<lang octave>function m = mode2(v)

 sv = sort(v);
 % build two vectors, vals and c, so that
 % c(i) holds how many times vals(i) appears
 i = 1; c = []; vals = [];
 while (i <= numel(v) )
   tc = sum(sv==sv(i)); % it would be faster to count
                        % them "by hand", since sv is sorted...
   c = [c, tc];
   vals = [vals, sv(i)];
   i += tc;
 endwhile
 % stack vals and c building a 2-rows matrix x
 x = cat(1,vals,c);
 % sort the second row (frequencies) into t (most frequent
 % first) and take the "original indices" i ... 
 [t, i] = sort(x(2,:), "descend");
 % ... so that we can use them to sort columns according
 % to frequencies
 nv = x(1,i);
 % at last, collect into m (the result) all the values
 % having the same bigger frequency
 r = t(1); i = 1;
 m = [];
 while ( t(i) == r )
   m = [m, nv(i)];
   i++;
 endwhile

endfunction</lang>

<lang octave>a = [1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]; mode2(a) mode(a)

a = [1, 1, 2, 4, 4]; mode2(a)  % returns 1 and 4 mode(a)  % returns 1 only</lang>

Perl

<lang perl>use strict; use List::Util qw(max);

sub mode {

   my %c;
   foreach my $e ( @_ ) {

$c{$e}++;

   }
   my $best = max(values %c);
   return grep { $c{$_} == $best } keys %c;

}</lang>

<lang perl>print "$_ " foreach mode(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17); print "\n"; print "$_ " foreach mode(1, 1, 2, 4, 4); print "\n";</lang>

PHP

Note: this function only works with strings and integers, as those are the only things that can be used as keys of an (associative) array in PHP. <lang php><?php function mode($arr) {

   $count = array_count_values($arr);
   $best = max($count);
   return array_keys($count, $best);

}

print_r(mode(array(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))); print_r(mode(array(1, 1, 2, 4, 4))); ?></lang>

Python

The following solutions require that the elements be hashable.

Works with: Python version 2.5, 2.6 & 3.1

<lang python>>>> from collections import defaultdict >>> def modes(values): count = defaultdict(int) for v in values: count[v] +=1 best = max(count.values()) return [k for k,v in count.items() if v == best]

>>> modes([1,3,6,6,6,6,7,7,12,12,17]) [6] >>> modes((1,1,2,4,4)) [1, 4]</lang>

Works with: Python version 3.1

<lang python>>>> from collections import Counter >>> def modes(values): count = Counter(values) best = max(count.values()) return [k for k,v in count.items() if v == best]

>>> modes([1,3,6,6,6,6,7,7,12,12,17]) [6] >>> modes((1,1,2,4,4)) [1, 4]</lang>

If you just want one mode (instead of all of them), here's a one-liner for that: <lang python>def onemode(values):

   return max(set(values), key=values.count)</lang>

R

<lang R>statmode <- function(v) {

 a <- sort(table(v), decreasing=TRUE)
 r <- c()
 for(i in 1:length(a)) {
   if ( a1 == ai ) {
     r <- c(r, as.integer(names(a)[i]))
   } else break; # since it's sorted, once we find
                 # a different value, we can stop
 }
 r

}

print(statmode(c(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))) print(statmode(c(1, 1, 2, 4, 4)))</lang>

Ruby

Here's two methods, the first more Ruby-ish, the second perhaps a bit more efficient. <lang ruby>def mode(ary)

 seen = Hash.new(0)
 ary.each {|value| seen[value] += 1}
 max = seen.values.max
 seen.find_all {|key,value| value == max}.map {|key,value| key}

end

def mode_one_pass(ary)

 seen = Hash.new(0)
 max = 0
 max_elems = []
 ary.each do |value|
   seen[value] += 1
   if seen[value] > max
     max = seen[value]
     max_elems = [value]
   elsif seen[value] == max
     max_elems << value
   end
 end
 max_elems

end

p mode([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode([1, 1, 2, 4, 4]) # => [1, 4] p mode_one_pass([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode_one_pass([1, 1, 2, 4, 4]) # => [1, 4]</lang>

Works with: Ruby version 1.8.7

If you just want one mode (instead of all of them), here's a one-liner for that: <lang ruby>def one_mode(ary)

 ary.max_by { |x| ary.count(x) }

end</lang>

Smalltalk

Works with: GNU Smalltalk

This code is able to find the mode of any collection of any kind of object. <lang smalltalk>OrderedCollection extend [

 mode [ |s|
    s := self asBag sortedByCount.
    ^ (s select: [ :k | ((s at: 1) key) = (k key) ]) collect: [:k| k value]
 ]

].

  1. ( 1 3 6 6 6 6 7 7 12 12 17 ) asOrderedCollection
   mode displayNl.
  1. ( 1 1 2 4 4) asOrderedCollection
   mode displayNl.</lang>

Tcl

Works with: Tcl version 8.6

<lang tcl># Can find the modal value of any vector of values proc mode {n args} {

   foreach n [list $n {*}$args] {
       dict incr counter $n
   }
   set counts [lsort -stride 2 -index 1 -decreasing $counter]
   set best {}
   foreach {n count} $counts {
       if {[lindex $counts 1] == $count} {
           lappend best $n
       } else break
   }
   return $best

}

  1. Testing

puts [mode 1 3 6 6 6 6 7 7 12 12 17]; # --> 6 puts [mode 1 1 2 4 4]; # --> 1 4</lang> Note that this works for any kind of value.

Ursala

The mode function defined below works on lists of any type and returns a list of the modes. There is no concept of a general collection in Ursala. The algorithm is to partition the list by equality, then partition the classes by their lengths, and then select a representative from each member of the set of classes with the maximum length.

<lang Ursala>#import std

mode = ~&hS+ leql$^&h+ eql|=@K2

  1. cast %nLW

examples = mode~~ (<1,3,6,6,6,7,7,12,12,17>,<1,1,2,4,4>)</lang> The function is tested on a pair of lists, one with a unique mode and one with multiple modes. Here is the output.

(<6>,<4,1>)

Vedit macro language

Current edit buffer stores the collection. Each line is an item in the collection. The items can be any type (ascii text, numeric values in ascii, binary values). However, binary file with fixed record length would require some modifications to the code.

The "mode" item and it's count are displayed on status line. If there are multiple items with the same count, the smallest one is displayed. <lang vedit> BOF // Copy all data to a new buffer Reg_Copy(10, ALL) Buf_Switch(Buf_Free) Reg_Ins(10)

Sort(0, File_Size) // Sort the data

BOF repeat(ALL) { // Count & delete duplicate lines

   #1 = 1
   while(Match("^{.*}\N\1$", REGEXP)==0) {
       Del_Line(1)
       #1++
   }
   Num_Ins(#1, NOCR)             // Insert item count at the beginning of line
   Ins_Char(9)                   // TAB
   Line(1, ERRBREAK)             // Next (different) line

}

Sort(0, File_Size, REVERSE) // Sort according to the count

BOF // Display the results Reg_Copy_Block(10, CP, EOL_pos) Buf_Quit(OK) Statline_Message(@10) </lang>