Function frequency

From Rosetta Code
Revision as of 23:32, 3 September 2012 by rosettacode>Gerard Schildberger (→‎{{header|REXX}}: changed the choice of the name of the stemmed variable used for the counters. -- ~~~~)
Task
Function frequency
You are encouraged to solve this task according to the task description, using any language you may know.

Display - for a program or runtime environment (whatever suites the style of your language) - the top ten most frequently occurring functions (or also identifiers or tokens, if preferred).

This is a static analysis: The question is not how often each function is actually executed at runtime, but how often it is used by the programmer.

Besides its practical usefulness, the intent of this task is to show how to do self-inspection within the language.

ACL2

This will, unfortunately, also catch variable names after an open paren, such as in (let ...) expressions. <lang Lisp>(in-package "ACL2")

(set-state-ok t)

(defun read-all-objects (limit channel state)

  (mv-let (eof obj state)
          (read-object channel state)
     (if (or eof (zp limit))
         (mv nil state)
         (mv-let (so-far state)
                 (read-all-objects (- limit 1) channel state)
            (mv (cons obj so-far) state)))))

(defun list-starters (xs)

  (cond ((endp xs) nil)
        ((consp (first xs))
         (append (if (symbolp (first (first xs)))
                     (list (first (first xs)))
                     nil)
                 (list-starters (rest (first xs)))
                 (list-starters (rest xs))))
        (t (list-starters (rest xs)))))

(defun invoked-functions (filename state)

  (mv-let (channel state)
          (open-input-channel filename :object state)
     (mv-let (code state)
             (read-all-objects 1000 channel state)
        (mv (list-starters code) state))))

(defun increment-for (key alist)

  (cond ((endp alist) (list (cons key 1)))
        ((equal key (car (first alist)))
         (cons (cons key (1+ (cdr (first alist))))
               (rest alist)))
        (t (cons (first alist)
                 (increment-for key (rest alist))))))

(defun symbol-freq-table (symbols)

  (if (endp symbols)
      nil
      (increment-for (first symbols)
                     (symbol-freq-table (rest symbols)))))

(defun insert-freq-table (pair alist)

  (cond ((endp alist)
         (list pair))
        ((> (cdr pair) (cdr (first alist)))
         (cons pair alist))
        (t (cons (first alist)
                 (insert-freq-table pair (rest alist))))))

(defun isort-freq-table (alist)

  (if (endp alist)
      nil
      (insert-freq-table (first alist)
                         (isort-freq-table (rest alist)))))

(defun main (state)

  (mv-let (fns state)
          (invoked-functions "function-freq.lisp" state)
     (mv (take 10 (isort-freq-table
                   (symbol-freq-table fns))) state)))</lang>

Output (for itself):

(((FIRST . 10)
  (REST . 8)
  (DEFUN . 8)
  (CONS . 7)
  (MV-LET . 5)
  (LIST-STARTERS . 4)
  (IF . 4)
  (MV . 4)
  (COND . 3)
  (LIST . 3))
 <state>)

C

This program treats doesn't differentiate between macros and functions. It works by looking for function calls which are not inside strings or comments. If a function call has a C style comment between the opening brace and the name of the function, this program will not recognize it as a function call. <lang C>#define _POSIX_SOURCE

  1. include <ctype.h>
  2. include <stdio.h>
  3. include <stdlib.h>
  4. include <errno.h>
  5. include <string.h>
  6. include <stddef.h>
  7. include <sys/mman.h>
  8. include <sys/types.h>
  9. include <sys/stat.h>
  10. include <unistd.h>

struct functionInfo {

  char* name;
  int timesCalled;
  char marked;

}; void addToList(struct functionInfo** list, struct functionInfo toAdd, \

              size_t* numElements, size_t* allocatedSize) {
  static const char* keywords[32] = {"auto", "break", "case", "char", "const", \
                                     "continue", "default", "do", "double", \
                                     "else", "enum", "extern", "float", "for", \
                                     "goto", "if", "int", "long", "register", \
                                     "return", "short", "signed", "sizeof", \
                                     "static", "struct", "switch", "typedef", \
                                     "union", "unsigned", "void", "volatile", \
                                     "while"};
  /* If the "function" being called is actually a keyword, then ignore it */
  for (int i = 0;i < 32;i++) {
     if (!strcmp(toAdd.name, keywords[i])) {
        return;
     }
  }
  if (!*list) {
     *allocatedSize = 10;
     *list = calloc(*allocatedSize, sizeof(struct functionInfo));
     if (!*list) {
        printf("Failed to allocate %d elements of %d bytes each.\n", \
               *allocatedSize, sizeof(struct functionInfo));
        abort();
     }
     (*list)[0].name = malloc(strlen(toAdd.name)+1);
     if (!(*list)[0].name) {
        printf("Failed to allocate %d bytes.\n", strlen(toAdd.name)+1);
        abort();
     }
     strcpy((*list)[0].name, toAdd.name);
     (*list)[0].timesCalled = 1;
     (*list)[0].marked = 0;
     *numElements = 1;
  }
  else {
     char found = 0;
     for (unsigned int i = 0;i < *numElements;i++) {
        if (!strcmp((*list)[i].name, toAdd.name)) {
           found = 1;
           (*list)[i].timesCalled++;
           break;
        }
     }
     if (!found) {
        struct functionInfo* newList = calloc((*allocatedSize)+10, \
                                              sizeof(struct functionInfo));
        if (!newList) {
           printf("Failed to allocate %d elements of %d bytes each.\n", \
                  (*allocatedSize)+10, sizeof(struct functionInfo));
           abort();
        }
        memcpy(newList, *list, (*allocatedSize)*sizeof(struct functionInfo));
        free(*list);
        *allocatedSize += 10;
        *list = newList;
        (*list)[*numElements].name = malloc(strlen(toAdd.name)+1);
        if (!(*list)[*numElements].name) {
           printf("Failed to allocate %d bytes.\n", strlen(toAdd.name)+1);
           abort();
        }
        strcpy((*list)[*numElements].name, toAdd.name);
        (*list)[*numElements].timesCalled = 1;
        (*list)[*numElements].marked = 0;
        (*numElements)++;
     }
  }

} void printList(struct functionInfo** list, size_t numElements) {

  char maxSet = 0;
  size_t maxIndex = 0;
  for (unsigned int i = 0;i<10;i++) {
     maxSet = 0;
     for (size_t j = 0;j<numElements;j++) {
        if (!maxSet || (*list)[j].timesCalled > (*list)[maxIndex].timesCalled) {
           if (!(*list)[j].marked) {
              maxSet = 1;
              maxIndex = j;
           }
        }
     }
     (*list)[maxIndex].marked = 1;
     printf("%s() called %d times.\n", (*list)[maxIndex].name, \
            (*list)[maxIndex].timesCalled);
  }

} void freeList(struct functionInfo** list, size_t numElements) {

  for (size_t i = 0;i<numElements;i++) {
     free((*list)[i].name);
  }
  free(*list);

} char* extractFunctionName(char* readHead) {

  char* identifier = readHead;
  if (isalpha(*identifier) || *identifier == '_') {
     while (isalnum(*identifier) || *identifier == '_') {
        identifier++;
     }
  }
  /* Search forward for spaces and then an open parenthesis
   * but do not include this in the function name.
   */
  char* toParen = identifier;
  if (toParen == readHead) return NULL;
  while (isspace(*toParen)) {
     toParen++;
  }
  if (*toParen != '(') return NULL;
  /* Copy the found function name to the output string */
  ptrdiff_t size = (ptrdiff_t)((ptrdiff_t)identifier) \
                   - ((ptrdiff_t)readHead)+1;
  char* const name = malloc(size);
  if (!name) {
     printf("Failed to allocate %d bytes.\n", size);
     abort();
  }
  name[size-1] = '\0';
  memcpy(name, readHead, size-1);
  /* Function names can't be blank */
  if (strcmp(name, "")) {
     return name;
  }
  return NULL;

} int main(int argc, char** argv) {

  for (int i = 1;i<argc;i++) {
     errno = 0;
     FILE* file = fopen(argv[i], "r");
     if (errno || !file) {
        printf("fopen() failed with error code \"%s\"\n", \
               strerror(errno));
        abort();
     }
     char comment = 0;
     #define DOUBLEQUOTE 1
     #define SINGLEQUOTE 2
     int string = 0;
     struct functionInfo* functions = NULL;
     struct functionInfo toAdd;
     size_t numElements = 0;
     size_t allocatedSize = 0;
     struct stat metaData;
     errno = 0;
     if (fstat(fileno(file), &metaData) < 0) {
        printf("fstat() returned error \"%s\"\n", strerror(errno));
        abort();
     }
     char* mmappedSource = (char*)mmap(NULL, metaData.st_size, PROT_READ, \
                                       MAP_PRIVATE, fileno(file), 0);
     if (errno) {
        printf("mmap() failed with error \"%s\"\n", strerror(errno));
        abort();
     }
     if (!mmappedSource) {
        printf("mmap() returned NULL.\n");
        abort();
     }
     char* readHead = mmappedSource;
     while (readHead < mmappedSource + metaData.st_size) {
        while (*readHead) {
           /* Ignore comments inside strings */
           if (!string) {
              if (*readHead == '/' && !strncmp(readHead, "/*", 2)) {
                 comment = 1;
              }
              if (*readHead == '*' && !strncmp(readHead, "*/", 2)) {
                 comment = 0;
              }
           }
           /* Ignore strings inside comments */
           if (!comment) {
              if (*readHead == '"') {
                 if (!string) {
                    string = DOUBLEQUOTE;
                 }
                 else if (string == DOUBLEQUOTE) {
                    /* Only toggle string mode if the quote character
                     * is not escaped
                     */
                    if (strncmp((readHead-1), "\\\"", 2)) {
                       string = 0;
                    }
                 }
              }
              if (*readHead == '\) {
                 if (!string) {
                    string = SINGLEQUOTE;
                 }
                 else if (string == SINGLEQUOTE) {
                    if (strncmp((readHead-1), "\\\'", 2)) {
                       string = 0;
                    }
                 }
              }
           }
           /* Look for identifiers outside of any comment or string */
           if (!comment && !string) {
              char* name = extractFunctionName(readHead);
              /* Don't read part of an identifier on the next iteration */
              if (name) {
                 toAdd.name = name;
                 addToList(&functions, toAdd, &numElements, &allocatedSize);
                 readHead += strlen(name);
              }
              free(name);
           }
           readHead++;
        }
     }
     errno = 0;
     munmap(mmappedSource, metaData.st_size);
     if (errno) {
        printf("munmap() returned error \"%s\"\n", strerror(errno));
        abort();
     }
     errno = 0;
     fclose(file);
     if (errno) {
        printf("fclose() returned error \"%s\"\n", strerror(errno));
        abort();
     }
     printList(&functions, numElements);
     freeList(&functions, numElements);
  }
  return 0;

}</lang>

Go

Only crude approximation is currently easy in Go. The following parses source code, looks for function call syntax (an expression followed by an argument list) and prints the expression. <lang go>package main

import (

   "fmt"
   "go/ast"
   "go/parser"
   "go/token"
   "io/ioutil"
   "os"
   "sort"

)

func main() {

   if len(os.Args) != 2 {
       fmt.Println("usage ff <go source filename>")
       return
   }
   src, err := ioutil.ReadFile(os.Args[1])
   if err != nil {
       fmt.Println(err)
       return
   }
   fs := token.NewFileSet()
   a, err := parser.ParseFile(fs, os.Args[1], src, 0)
   if err != nil {
       fmt.Println(err)
       return
   }
   f := fs.File(a.Pos())
   m := make(map[string]int)
   ast.Inspect(a, func(n ast.Node) bool {
       if ce, ok := n.(*ast.CallExpr); ok {
           start := f.Offset(ce.Pos())
           end := f.Offset(ce.Lparen)
           m[string(src[start:end])]++
       }
       return true
   })
   cs := make(calls, 0, len(m))
   for k, v := range m {
       cs = append(cs, &call{k, v})
   }
   sort.Sort(cs)
   for i, c := range cs {
       fmt.Printf("%-20s %4d\n", c.expr, c.count)
       if i == 9 {
           break
       }
   }

}

type call struct {

   expr  string
   count int

} type calls []*call

func (c calls) Len() int { return len(c) } func (c calls) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c calls) Less(i, j int) bool { return c[i].count > c[j].count }</lang> Output, when run on source code above:

len                     3
fmt.Println             3
f.Offset                2
make                    2
fmt.Printf              1
ioutil.ReadFile         1
a.Pos                   1
string                  1
token.NewFileSet        1
append                  1

PicoLisp

<lang PicoLisp>(let Freq NIL

  (for "L" (filter pair (extract getd (all)))
     (for "F"
        (filter atom
           (fish '((X) (or (circ? X) (getd X)))
              "L" ) )
        (accu 'Freq "F" 1) ) )
  (for X (head 10 (flip (by cdr sort Freq)))
     (tab (-7 4) (car X) (cdr X)) ) )</lang>

Output, for the system in debug mode plus the above code:

quote   310
car     236
cdr     181
setq    148
let     136
if      127
and     124
cons    110
cadr     80
or       76

If the condition in the 5th line (getd X) is replaced with (sym? X), then all symbols are counted, and the output is

X       566
quote   310
car     236
cdr     181
C       160
N       157
L       155
Lst     152
setq    148
T       144

And if it is replaced with (num? X), it is

1        71
0        38
2        27
3        17
7         9
-1        9
100       8
48        6
43        6
12        6

REXX

This version doesn't report on the top ten functions (subroutines), only the functions that are been counted (as implemented below).

There is no direct method of this type of counting behavior, but this technique allows a programmer to determine how many invocations there are for various functions/subroutines.

The use of the ?. stemmed variable is not specific and it can be any (seldom-used, or better yet, unused) REXX variable.
[A question mark was chosen because it most likely won't be used in most REXX programs.]
Also, the counter itself (the array index) should be unique (to avoid REXX variable name collisions). <lang rexx>/*REXX pgm counts frequency of various subroutine/function invocations. */ ?.=0 /*initialize all funky counters. */

            do j=1 to 10
            factorial   =      !(j)
            factorial_R =     !r(j)
            fibonacci   =    fib(j)
            fibonacci_R =   fibR(j)
            hofstadterQ =  hofsQ(j)
            width       = length(j) + length(length(j**j))
            end   /*j*/

say 'number of invocations for ! (factorial) = '  ?.! say 'number of invocations for ! recursive = '  ?.!r say 'number of invocations for Fibonacci = '  ?.fib say 'number of invocations for Fib recursive = '  ?.fibR say 'number of invocations for Hofstadter Q = '  ?.hofsQ say 'number of invocations for LENGTH = '  ?.length exit /*stick a fork in it, we're done.*/

/*─────────────────────────────────────! (factorial) subroutine─────────*/ !: procedure expose ?.;  ?.!=?.!+1; parse arg x;  !=1

                   do j=2  to x;     !=!*j;     end;             return !

/*─────────────────────────────────────!r (factorial) subroutine────────*/ !r: procedure expose ?.;  ?.!r=?.!r+1; parse arg x; if x<2 then return 1

   return x * !R(x-1)

/*──────────────────────────────────FIB subroutine (non─recursive)──────*/ fib: procedure expose ?.; ?.fib=?.fib+1; parse arg n; na=abs(n); a=0; b=1

    if na<2  then  return na          /*test for couple special cases. */
            do j=2  to na;   s=a+b;   a=b;   b=s;   end
    if n>0 | na//2==1  then return  s /*if positive or odd negative... */
                       else return -s /*return a negative Fib number.  */

/*──────────────────────────────────FIBR subroutine (recursive)─────────*/ fibR: procedure expose ?.; ?.fibR=?.fibr+1; parse arg n; na=abs(n); s=1

     if na<2  then  return  na        /*handle a couple special cases. */
     if n <0  then  if n//2==0  then s=-1
     return (fibR(na-1)+fibR(na-2))*s

/*──────────────────────────────────LENGTH subroutine───────────────────*/ length: procedure expose ?.;  ?.length=?.length+1

       return 'LENGTH'(arg(1))

/*──────────────────────────────────HOFSQ subroutine (recursive)─────────* hofsQ: procedure expose ?.;  ?.hofsq=?.hofsq+1; parse arg n

       if n<2  then return 1
       return  hofsQ(n - hofsQ(n - 1)) + hofsQ(n - hofsQ(n - 2))</lang>

output when using the input of: xxx

number of invocations for ! (factorial) =  10
number of invocations for !   recursive =  55
number of invocations for Fibonacci     =  10
number of invocations for Fib recursive =  452
number of invocations for Hofstadter Q  =  1922
number of invocations for LENGTH        =  30

Tcl

<lang tcl>package require Tcl 8.6

proc examine {filename} {

   global cmds
   set RE "(?:^|\[\[\{\])\[\\w:.\]+"
   set f [open $filename]
   while {[gets $f line] >= 0} {

set line [string trim $line] if {$line eq "" || [string match "#*" $line]} { continue } foreach cmd [regexp -all -inline $RE $line] { incr cmds([string trim $cmd "\{\["]) }

   }
   close $f

}

  1. Parse each file on the command line

foreach filename $argv {

   examine $filename

}

  1. Get the command list in order of frequency

set cmdinfo [lsort -stride 2 -index 1 -integer -decreasing [array get cmds]]

  1. Print the top 10 (two list items per entry, so 0-19, not 0-9)

foreach {cmd count} [lrange $cmdinfo 0 19] {

   puts [format "%-20s%d" $cmd $count]

}</lang> Sample run (note that the commands found are all standard Tcl commands; they're just commands so it is natural to expect them to be found):

bash$ tclsh8.6 RosettaCode/cmdfreq.tcl RosettaCode/*.tcl 
set                 2374
expr                846
if                  775
puts                558
return              553
proc                549
incr                485
foreach             432
lindex              406
lappend             351