Function frequency
You are encouraged to solve this task according to the task description, using any language you may know.
Display - for a program or runtime environment (whatever suits the style of your language) - the top ten most frequently occurring functions (or also identifiers or tokens, if preferred).
This is a static analysis: The question is not how often each function is actually executed at runtime, but how often it is used by the programmer.
Besides its practical usefulness, the intent of this task is to show how to do self-inspection within the language.
ACL2
This will, unfortunately, also catch variable names after an open paren, such as in (let ...)
expressions.
<lang Lisp>(in-package "ACL2")
(set-state-ok t)
(defun read-all-objects (limit channel state)
(mv-let (eof obj state) (read-object channel state) (if (or eof (zp limit)) (mv nil state) (mv-let (so-far state) (read-all-objects (- limit 1) channel state) (mv (cons obj so-far) state)))))
(defun list-starters (xs)
(cond ((endp xs) nil) ((consp (first xs)) (append (if (symbolp (first (first xs))) (list (first (first xs))) nil) (list-starters (rest (first xs))) (list-starters (rest xs)))) (t (list-starters (rest xs)))))
(defun invoked-functions (filename state)
(mv-let (channel state) (open-input-channel filename :object state) (mv-let (code state) (read-all-objects 1000 channel state) (mv (list-starters code) state))))
(defun increment-for (key alist)
(cond ((endp alist) (list (cons key 1))) ((equal key (car (first alist))) (cons (cons key (1+ (cdr (first alist)))) (rest alist))) (t (cons (first alist) (increment-for key (rest alist))))))
(defun symbol-freq-table (symbols)
(if (endp symbols) nil (increment-for (first symbols) (symbol-freq-table (rest symbols)))))
(defun insert-freq-table (pair alist)
(cond ((endp alist) (list pair)) ((> (cdr pair) (cdr (first alist))) (cons pair alist)) (t (cons (first alist) (insert-freq-table pair (rest alist))))))
(defun isort-freq-table (alist)
(if (endp alist) nil (insert-freq-table (first alist) (isort-freq-table (rest alist)))))
(defun main (state)
(mv-let (fns state) (invoked-functions "function-freq.lisp" state) (mv (take 10 (isort-freq-table (symbol-freq-table fns))) state)))</lang>
Output (for itself):
(((FIRST . 10) (REST . 8) (DEFUN . 8) (CONS . 7) (MV-LET . 5) (LIST-STARTERS . 4) (IF . 4) (MV . 4) (COND . 3) (LIST . 3)) <state>)
BBC BASIC
<lang bbcbasic> INSTALL @lib$+"SORTLIB"
Sort% = FN_sortinit(1,0) : REM Descending Valid$ = "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz" DIM func$(1000), cnt%(1000) nFunc% = 0 file% = OPENIN("*.bbc") WHILE NOT EOF#file% ll% = BGET#file% no% = BGET#file% + 256*BGET#file% INPUT #file%, l$ i% = 1 REPEAT j% = INSTR(l$, CHR$&A4, i%) : REM Token for 'FN' k% = INSTR(l$, CHR$&F2, i%) : REM Token for 'PROC' IF k% IF j%=0 OR j%>k% THEN i% = k% f$ = "PROC" ELSE i% = j% f$ = "FN" ENDIF IF i% THEN REPEAT i% += 1 f$ += MID$(l$, i%, 1) UNTIL INSTR(Valid$, MID$(l$, i%+1, 1)) = 0 FOR j% = 0 TO nFunc%-1 IF f$ = func$(j%) EXIT FOR NEXT IF j% >= nFunc% nFunc% += 1 func$(j%) = f$ cnt%(j%) += 1 ENDIF UNTIL i%=0 ENDWHILE CLOSE #file% C% = nFunc% CALL Sort%, cnt%(0), func$(0) IF C% > 10 C% = 10 FOR i% = 0 TO C%-1 PRINT func$(i%) " (" ; cnt%(i%) ")" NEXT</lang>
Output (for file LBB.BBC):
FNcheck (291) FNexpr (125) FNtoken (49) PROCout (41) FNhandle (31) FNupper (30) FNitem (30) FNcheckns (21) FNinstrq (17) FNchild (17)
C
This program treats doesn't differentiate between macros and functions. It works by looking for function calls which are not inside strings or comments. If a function call has a C style comment between the opening brace and the name of the function, this program will not recognize it as a function call. <lang C>
- define _POSIX_SOURCE
- include <ctype.h>
- include <stdio.h>
- include <stdlib.h>
- include <errno.h>
- include <string.h>
- include <stddef.h>
- include <sys/mman.h>
- include <sys/types.h>
- include <sys/stat.h>
- include <unistd.h>
struct functionInfo {
char* name; int timesCalled; char marked;
}; void addToList(struct functionInfo** list, struct functionInfo toAdd, \
size_t* numElements, size_t* allocatedSize)
{
static const char* keywords[32] = {"auto", "break", "case", "char", "const", \ "continue", "default", "do", "double", \ "else", "enum", "extern", "float", "for", \ "goto", "if", "int", "long", "register", \ "return", "short", "signed", "sizeof", \ "static", "struct", "switch", "typedef", \ "union", "unsigned", "void", "volatile", \ "while" }; int i; /* If the "function" being called is actually a keyword, then ignore it */ for (i = 0; i < 32; i++) { if (!strcmp(toAdd.name, keywords[i])) { return; } } if (!*list) { *allocatedSize = 10; *list = calloc(*allocatedSize, sizeof(struct functionInfo)); if (!*list) { printf("Failed to allocate %lu elements of %lu bytes each.\n", \ *allocatedSize, sizeof(struct functionInfo)); abort(); } (*list)[0].name = malloc(strlen(toAdd.name)+1); if (!(*list)[0].name) { printf("Failed to allocate %lu bytes.\n", strlen(toAdd.name)+1); abort(); } strcpy((*list)[0].name, toAdd.name); (*list)[0].timesCalled = 1; (*list)[0].marked = 0; *numElements = 1; } else { char found = 0; unsigned int i; for (i = 0; i < *numElements; i++) { if (!strcmp((*list)[i].name, toAdd.name)) { found = 1; (*list)[i].timesCalled++; break; } } if (!found) { struct functionInfo* newList = calloc((*allocatedSize)+10, \ sizeof(struct functionInfo)); if (!newList) { printf("Failed to allocate %lu elements of %lu bytes each.\n", \ (*allocatedSize)+10, sizeof(struct functionInfo)); abort(); } memcpy(newList, *list, (*allocatedSize)*sizeof(struct functionInfo)); free(*list); *allocatedSize += 10; *list = newList; (*list)[*numElements].name = malloc(strlen(toAdd.name)+1); if (!(*list)[*numElements].name) { printf("Failed to allocate %lu bytes.\n", strlen(toAdd.name)+1); abort(); } strcpy((*list)[*numElements].name, toAdd.name); (*list)[*numElements].timesCalled = 1; (*list)[*numElements].marked = 0; (*numElements)++; } }
} void printList(struct functionInfo** list, size_t numElements) {
char maxSet = 0; unsigned int i; size_t maxIndex = 0; for (i = 0; i<10; i++) { maxSet = 0; size_t j; for (j = 0; j<numElements; j++) { if (!maxSet || (*list)[j].timesCalled > (*list)[maxIndex].timesCalled) { if (!(*list)[j].marked) { maxSet = 1; maxIndex = j; } } } (*list)[maxIndex].marked = 1; printf("%s() called %d times.\n", (*list)[maxIndex].name, \ (*list)[maxIndex].timesCalled); }
} void freeList(struct functionInfo** list, size_t numElements) {
size_t i; for (i = 0; i<numElements; i++) { free((*list)[i].name); } free(*list);
} char* extractFunctionName(char* readHead) {
char* identifier = readHead; if (isalpha(*identifier) || *identifier == '_') { while (isalnum(*identifier) || *identifier == '_') { identifier++; } } /* Search forward for spaces and then an open parenthesis * but do not include this in the function name. */ char* toParen = identifier; if (toParen == readHead) return NULL; while (isspace(*toParen)) { toParen++; } if (*toParen != '(') return NULL; /* Copy the found function name to the output string */ ptrdiff_t size = (ptrdiff_t)((ptrdiff_t)identifier) \ - ((ptrdiff_t)readHead)+1; char* const name = malloc(size); if (!name) { printf("Failed to allocate %lu bytes.\n", size); abort(); } name[size-1] = '\0'; memcpy(name, readHead, size-1); /* Function names can't be blank */ if (strcmp(name, "")) { return name; } free(name); return NULL;
} int main(int argc, char** argv) {
int i; for (i = 1; i<argc; i++) { errno = 0; FILE* file = fopen(argv[i], "r"); if (errno || !file) { printf("fopen() failed with error code \"%s\"\n", \ strerror(errno)); abort(); } char comment = 0;
- define DOUBLEQUOTE 1
- define SINGLEQUOTE 2
int string = 0; struct functionInfo* functions = NULL; struct functionInfo toAdd; size_t numElements = 0; size_t allocatedSize = 0; struct stat metaData; errno = 0; if (fstat(fileno(file), &metaData) < 0) { printf("fstat() returned error \"%s\"\n", strerror(errno)); abort(); } char* mmappedSource = (char*)mmap(NULL, metaData.st_size, PROT_READ, \ MAP_PRIVATE, fileno(file), 0); if (errno) { printf("mmap() failed with error \"%s\"\n", strerror(errno)); abort(); } if (!mmappedSource) { printf("mmap() returned NULL.\n"); abort(); } char* readHead = mmappedSource; while (readHead < mmappedSource + metaData.st_size) { while (*readHead) { /* Ignore comments inside strings */ if (!string) { if (*readHead == '/' && !strncmp(readHead, "/*", 2)) { comment = 1; } if (*readHead == '*' && !strncmp(readHead, "*/", 2)) { comment = 0; } } /* Ignore strings inside comments */ if (!comment) { if (*readHead == '"') { if (!string) { string = DOUBLEQUOTE; } else if (string == DOUBLEQUOTE) { /* Only toggle string mode if the quote character * is not escaped */ if (strncmp((readHead-1), "\\\"", 2)) { string = 0; } } } if (*readHead == '\) { if (!string) { string = SINGLEQUOTE; } else if (string == SINGLEQUOTE) { if (strncmp((readHead-1), "\\\'", 2)) { string = 0; } } } } /* Look for identifiers outside of any comment or string */ if (!comment && !string) { char* name = extractFunctionName(readHead); /* Don't read part of an identifier on the next iteration */ if (name) { toAdd.name = name; addToList(&functions, toAdd, &numElements, &allocatedSize); readHead += strlen(name); } free(name); } readHead++; } } errno = 0; munmap(mmappedSource, metaData.st_size); if (errno) { printf("munmap() returned error \"%s\"\n", strerror(errno)); abort(); } errno = 0; fclose(file); if (errno) { printf("fclose() returned error \"%s\"\n", strerror(errno)); abort(); } printList(&functions, numElements); freeList(&functions, numElements); } return 0;
}</lang>
Erlang
This is source code analyse. Mainly because I have never done that before. <lang Erlang> -module( function_frequency ).
-export( [erlang_source/1, task/0] ).
erlang_source( File ) ->
{ok, IO} = file:open( File, [read] ), Forms = parse_all( IO, io:parse_erl_form(IO, ), [] ), Functions = lists:flatten( [erl_syntax_lib:fold(fun accumulate_functions/2, [], X) || X <- Forms] ), dict:to_list( lists:foldl(fun count/2, dict:new(), Functions) ).
task() ->
Function_frequencies = erlang_source( "function_frequency.erl" ), {Top_tens, _Rest} = lists:split( 10, lists:reverse(lists:keysort(2, Function_frequencies)) ), [io:fwrite("Function ~p called ~p times.~n", [X, Y]) || {X, Y} <- Top_tens].
accumulate_functions( Tree, Acc ) -> accumulate_functions( erlang:element(1, Tree), Tree, Acc ).
accumulate_functions( call, Tree, Acc ) -> [accumulate_functions_name(Tree) | Acc]; accumulate_functions( _Other, _Tree, Acc ) -> Acc.
accumulate_functions_name( Tree ) -> accumulate_functions_name_scoop( erlang:element(3, Tree) ).
accumulate_functions_name_scoop( {atom, _Line, Name} ) -> Name; accumulate_functions_name_scoop( {remote, _Line, {atom, _Line, Module}, {atom, _Line, Name}} ) -> {Module, Name}.
count( Key, Dict ) -> dict:update_counter( Key, 1, Dict ).
parse_all( _IO, {eof, _End}, Acc ) -> Acc; parse_all( IO, {ok, Tokens, Location}, Acc ) -> parse_all( IO, io:parse_erl_form(IO, , Location), [Tokens | Acc] ). </lang>
- Output:
7> function_frequency:task(). Function parse_all called 2 times. Function {erlang,element} called 2 times. Function {io,parse_erl_form} called 2 times. Function {lists,flatten} called 1 times. Function {dict,update_counter} called 1 times. Function {lists,reverse} called 1 times. Function {lists,foldl} called 1 times. Function {io,fwrite} called 1 times. Function accumulate_functions_name called 1 times. Function erlang_source called 1 times.
Forth
Counts colon definitions, variables, constants, words defined by definers like CREATE..DOES>, etc. Check for levels of top from command line, by default 4. GForth 0.7.0 specific. <lang Forth>' noop is bootmessage
\ --- LIST OF CONSTANTS \ WORD# maximum word size \ RING# size of `Rings' element \ DEFS definitions \ KEYS \ \ --- LIST OF VARIABLES \ cmpl? is compiling? \ cword current compiled word
wordlist constant DEFS wordlist constant KEYS
\ --- Compiling 50 constant WORD#
- >>fPAD ( ca u -- ; u < 51 )
PAD 80 blank s" create " PAD swap MOVE s" 1 , DOES> 1 swap +! ;" PAD 57 + swap MOVE WORD# min PAD 7 + swap MOVE ;
- funcmpl ( ca u -- )
>>fPAD current @ DEFS current ! PAD 80 evaluate current ! ;
- >>kPAD ( ca u -- ; )
PAD 80 blank s" : " PAD swap MOVE s" parse-name funcmpl ;" PAD 59 + swap MOVE WORD# min PAD 2 + swap MOVE ;
- keycmpl ( ca u -- )
>>kPAD current @ KEYS current ! PAD 80 evaluate current ! ;
\ --- Interpreter
- intp BEGIN parse-name dup
WHILE ( ca u ) 2dup KEYS search-wordlist IF execute 2drop ELSE DEFS search-wordlist IF execute THEN THEN REPEAT 2drop ;
- run BEGIN refill WHILE intp REPEAT ;
\ --- Lists&Rings warnings OFF
- LIST ( node -- ) ]] BEGIN @ dup WHILE >R [[ ; immediate
warnings ON
- LOOP-LIST ( -- ) ]] R> REPEAT drop [[ ; immediate
- empty-ring? ( node -- f ) dup @ = ;
- RING ( node -- ) ]] dup BEGIN @ 2dup <> WHILE 2>R [[ ; immediate
- LOOP-RING ( -- ) ]] 2R> REPEAT 2drop [[ ; immediate
- new-node ( -- node )
here dup , ;
- do-link ( node new-node -- ; do link after current node )
over @ over ! swap ! ;
\ --- Sorting..
- nt>freq ( nt -- n ;frequency of uses )
name>int >BODY @ ;
- @maxfreq ( wid -- n ;maximum frequency )
0 swap cell+ LIST ( max ) I nt>freq 2dup < IF nip ELSE drop THEN LOOP-LIST ;
2 cells constant RING#
- rings-vec ( u -- a size ; create vector of rings )
here over 1+ 0 DO new-node drop 0 , LOOP swap RING# * ;
- populate-by ( a wid -- )
cell+ LIST dup I nt>freq RING# * + \ root-node new-node I , \ new-node do-link LOOP-LIST drop ;
\ --- Display TOP
- node>nt cell+ @ ;
- .ring ( root-node -- )
0 swap RING dup 0= IF I node>nt nt>freq . THEN space I node>nt name>string type 1+ LOOP-RING drop cr ;
- .top ( a size n -- )
-rot BOUNDS swap ?DO ( n ) I empty-ring? 0= IF 1- I .ring THEN dup 0= IF drop UNLOOP EXIT THEN [ RING# negate ] LITERAL +LOOP drop ;
- args>top# ( -- n )
1 arg 2dup 0 0 d<> IF >float IF f>d d>s dup 0= IF drop 4 THEN ELSE 4 THEN ELSE 2drop 4 THEN ;
\ --- KEYS behaviour
variable cmpl? cmpl? OFF 2variable cword
here WORD# allot 0 cword 2!
current @ KEYS current !
- create
cmpl? @ IF cword 2@ keycmpl ELSE parse-name funcmpl THEN ;
- constant
cmpl? @ IF cword 2@ keycmpl ELSE parse-name funcmpl THEN ;
- variable parse-name funcmpl ;
- value parse-name funcmpl ;
- defer parse-name funcmpl ;
- ( BEGIN >in @ [char] ) parse nip >in @ rot - =
WHILE refill 0= IF exit THEN REPEAT ;
- \ 10 parse 2drop ;
- \G 10 parse 2drop ;
- S" [char] " parse 2drop ;
- ." [char] " parse 2drop ;
- [']
parse-name DEFS search-wordlist IF execute THEN ;
- postpone
parse-name DEFS search-wordlist IF execute THEN ;
- ; cmpl? OFF ;
- : warnings OFF
parse-name cword 2@ drop WORD# rot umin dup >R MOVE cword 2@ drop R> cword 2! cword 2@ cmpl? @ IF keycmpl \ `:' inside def. = a defining word ELSE funcmpl THEN cmpl? ON warnings ON
current !
\ Run, ruuun! stdin ' run execute-parsing-file DEFS @maxfreq rings-vec over DEFS populate-by args>top# .top bye </lang> Self test:
$ $ gforth funfreq.fs 10 < funfreq.fs 7 DEFS funcmpl cmpl? 5 WORD# 4 KEYS keycmpl nt>freq RING# 3 LIST LOOP-LIST new-node node>nt 2 >>fPAD >>kPAD intp run empty-ring? RING LOOP-RING do-link @maxfreq rings-vec populate-by .ring .top args>top# 1 create constant variable value defer ( \ \G S" ." ['] postpone ; : $ $ gforth funfreq.fs < funfreq.fs 7 DEFS funcmpl cmpl? 5 WORD# 4 KEYS keycmpl nt>freq RING# 3 LIST LOOP-LIST new-node node>nt $
Go
Only crude approximation is currently easy in Go. The following parses source code, looks for function call syntax (an expression followed by an argument list) and prints the expression. <lang go>package main
import (
"fmt" "go/ast" "go/parser" "go/token" "io/ioutil" "os" "sort"
)
func main() {
if len(os.Args) != 2 { fmt.Println("usage ff <go source filename>") return } src, err := ioutil.ReadFile(os.Args[1]) if err != nil { fmt.Println(err) return } fs := token.NewFileSet() a, err := parser.ParseFile(fs, os.Args[1], src, 0) if err != nil { fmt.Println(err) return } f := fs.File(a.Pos()) m := make(map[string]int) ast.Inspect(a, func(n ast.Node) bool { if ce, ok := n.(*ast.CallExpr); ok { start := f.Offset(ce.Pos()) end := f.Offset(ce.Lparen) m[string(src[start:end])]++ } return true }) cs := make(calls, 0, len(m)) for k, v := range m { cs = append(cs, &call{k, v}) } sort.Sort(cs) for i, c := range cs { fmt.Printf("%-20s %4d\n", c.expr, c.count) if i == 9 { break } }
}
type call struct {
expr string count int
} type calls []*call
func (c calls) Len() int { return len(c) } func (c calls) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c calls) Less(i, j int) bool { return c[i].count > c[j].count }</lang> Output, when run on source code above:
len 3 fmt.Println 3 f.Offset 2 make 2 fmt.Printf 1 ioutil.ReadFile 1 a.Pos 1 string 1 token.NewFileSet 1 append 1
J
The lowest approach taken here makes no effort to classify the primitives as monads nor dyads, nor as verbs, adverbs, nor conjunctions. Did "-" mean "additive inverse" or indicate subtraction? Does ";" raze or link? J is a multi-instruction single data language. Parentheses around a group of verbs form hooks or forks which affect data flow. The simple top10 verb does not find these important constructs.
<lang j>
PRIMITIVES=: ;:'! !. !: " ". ": # #. #: $ $. $: % %. %: & &. &.: &: * *. *: + +. +: , ,. ,: - -. -: . .. .: / /. /: 0: 1: 2: 3: 4: 5: 6: 7: 8: 9: : :. :: ; ;. ;: < <. <: = =. =: > >. >: ? ?. ...'
Filter=: (#~`)(`:6)
NB. monad top10 . y is a character vector of much j source code top10=: 10 {. \:~@:((#;{.)/.~@:(e.&PRIMITIVES Filter@:;:))
top10 JSOURCE NB. JSOURCE are the j Zeckendorf verbs.
┌─┬──┐ │6│=.│ ├─┼──┤ │5│=:│ ├─┼──┤ │4│@:│ ├─┼──┤ │3│~ │ ├─┼──┤ │3│: │ ├─┼──┤ │3│+ │ ├─┼──┤ │3│$ │ ├─┼──┤ │2│|.│ ├─┼──┤ │2│i.│ ├─┼──┤ │2│/ │ └─┴──┘ </lang>
LiveCode
Initially based on Listing all the handlers in a script <lang LiveCode>function handlerNames pScript
put pScript into pScriptCopy filter pScript with regex pattern "^(on|function).*" -- add in the built-in commands & functions put the commandNames & the functionnames into cmdfunc repeat for each line builtin in cmdfunc put 0 into handlers[builtin] end repeat -- add user defined handlers, remove this section of you do not want your own functions included repeat with x = 1 to the number of lines of pScript put word 2 of line x of pScript into handlername put 0 into handlers[handlername] end repeat -- count handlers used repeat with x = 1 to the number of lines of pScriptCopy repeat for each key k in handlers if k is among the tokens of line x of pScriptCopy then add 1 to handlers[k] end if end repeat end repeat combine handlers using cr and space sort lines of handlers descending by word 2 of each put line 1 to 10 of handlers into handlers return handlers
end handlerNames</lang>
To use<lang LiveCode>put handlerNames(the script of this stack & cr & the script of this card & cr & the script of me)</lang>
Sample output<lang LiveCode>if 8 put 8 return 8 function 7 factorialacc 4 -- user def function for other rosetta task factorialr 3 -- user def function for other rosetta task handlerNames 3 factorial 2 -- user def function for other rosetta task factorialit 2 -- user def function for other rosetta task mouseUp 2</lang>
Mathematica / Wolfram Language
<lang Mathematica>programCount[fn_] := Reverse[If[Length[#] > 10, Take[#, -10], #] &[SortBy[Tally[Cases[DownValues[fn], s_Symbol, \[Infinity], Heads -> True]], Last]]]</lang>
- Output:
The output of applying this program to itself...
programCount[programCount] {{Slot, 3}, {Pattern, 2}, {fn, 2}, {Blank, 2}, {\[Infinity], 1}, {True, 1}, {Tally, 1}, {Take, 1}, {Symbol, 1}, {SortBy, 1}}
Perl 6
Here we just examine the ast of the Perl 6 compiler (which is written in Perl 6) to look for function calls. <lang perl6>my $text = qqx[perl6 --target=ast @*ARGS[]]; my %fun; for $text.lines {
%fun{$0}++ if / '(call &' (.*?) ')' /
}
for %fun.invert.sort.reverse[^10] { .value.say }</lang>
- Output:
Here we run it on the strand sort RC entry. Note how Perl 6 considers various operators to really be function calls underneath.
$ ./morefun strand pop postcircumfix:<[ ]> unshift succeed splice prefix:<-> push infix:<,> infix:<..> infix:<->
PicoLisp
<lang PicoLisp>(let Freq NIL
(for "L" (filter pair (extract getd (all))) (for "F" (filter atom (fish '((X) (or (circ? X) (getd X))) "L" ) ) (accu 'Freq "F" 1) ) ) (for X (head 10 (flip (by cdr sort Freq))) (tab (-7 4) (car X) (cdr X)) ) )</lang>
Output, for the system in debug mode plus the above code:
quote 310 car 236 cdr 181 setq 148 let 136 if 127 and 124 cons 110 cadr 80 or 76
If the condition in the 5th line (getd X) is replaced with (sym? X), then all symbols are counted, and the output is
X 566 quote 310 car 236 cdr 181 C 160 N 157 L 155 Lst 152 setq 148 T 144
And if it is replaced with (num? X), it is
1 71 0 38 2 27 3 17 7 9 -1 9 100 8 48 6 43 6 12 6
Python
This code parses a Python source file using the built-in ast module and counts simple function calls; it won't process method calls or cases when you call the result of an expression. Also, since constructors are invoked by calling the class, constructor calls are counted as well. <lang python>import ast
class CallCountingVisitor(ast.NodeVisitor):
def __init__(self): self.calls = {}
def visit_Call(self, node): if isinstance(node.func, ast.Name): fun_name = node.func.id call_count = self.calls.get(fun_name, 0) self.calls[fun_name] = call_count + 1 self.generic_visit(node)
filename = input('Enter a filename to parse: ') with open(filename, encoding='utf-8') as f:
contents = f.read()
root = ast.parse(contents, filename=filename) #NOTE: this will throw a SyntaxError if the file isn't valid Python code visitor = CallCountingVisitor() visitor.visit(root) top10 = sorted(visitor.calls.items(), key=lambda x: x[1], reverse=True)[:10] for name, count in top10:
print(name,'called',count,'times')
</lang>
The result of running the program on the ftplib module of Python 3.2:
Enter a filename to parse: c:\Python32\Lib\ftplib.py error_reply called 10 times print called 10 times error_proto called 8 times callback called 8 times int called 7 times repr called 7 times isinstance called 6 times len called 6 times ValueError called 3 times parse257 called 2 times
Racket
<lang racket>
- lang racket
(require math) (define in (open-input-file "function-frequency.rkt")) (void (read-language in)) (define s-exprs (for/list ([s (in-port read in)]) s)) (define symbols (filter symbol? (flatten s-exprs))) (define counts (sort (hash->list (samples->hash symbols)) >= #:key cdr)) (take counts (min 10 (length counts))) </lang> Output: <lang racket> '((define . 4)
(counts . 3) (s-exprs . 2) (s . 2) (symbols . 2) (a-program . 2) (filter . 1) (hash->list . 1) (in-port . 1) (sort . 1))
</lang>
REXX
version 1
This version doesn't report on the top ten functions (or subroutines), only the functions that are been counted (as implemented below).
There is no direct method of this type of counting behavior, but this technique allows a programmer to determine how many invocations there are for various functions/subroutines.
The use of the ?. stemmed variable is not specific and it can be any (seldom-used, or better yet, unused) REXX variable.
[A question mark was chosen because it most likely won't be used in most REXX programs.]
Also, the counter itself (the array index) should be unique (to avoid REXX variable name collisions).
<lang rexx>/*REXX program counts frequency of various subroutine/function invocations. */
parse arg many . /*obtain optional argument from the CL.*/
if many== | many==',' then many=20 /*Not specified? Then use the default.*/
numeric digits 600 /*insure we can handle some big numbers*/
?.=0 /*initialize (et al) the freq counters.*/
do j=1 for many /* [↓] perform this loop MANY times.*/ factorial = !(j) factorial_R = !R(j) fibonacci = fib(j) fibonacci_R = fibR(j) HofstadterQ = hofsQ(j) breadth = length(j) + length(length(j**j)) end /*j*/ /*max # of invokes [↓] */
say 'number of invocations for ! (factorial) =' right(?.! , ?.w) say 'number of invocations for ! recursive =' right(?.!R , ?.w) say 'number of invocations for Fibonacci =' right(?.fib , ?.w) say 'number of invocations for Fib recursive =' right(?.fibR , ?.w) say 'number of invocations for Hofstadter Q =' right(?.hofsQ , ?.w) say 'number of invocations for length =' right(?.length , ?.w) exit /*stick a fork in it, we're all done. */ /*────────────────────────────────────────────────────────────────────────────*/ ?: arg _; ?._=?._+1; ?.w=max(?.w, 'LENGTH'(?._)); return /*────────────────────────────────────────────────────────────────────────────*/ !: procedure expose ?.; call ? !; !=1; do j=2 to arg(1); !=!*j; end; return ! /*────────────────────────────────────────────────────────────────────────────*/ !R: procedure expose ?.; call ? !R; arg x;if x<2 then return 1;return x*!R(x-1) /*────────────────────────────────────────────────────────────────────────────*/ fib: procedure expose ?.; call ? fib; parse arg n; na=abs(n); a=0; b=1
if na<2 then return na /*test for couple special cases.*/ do j=2 to na; s=a+b; a=b; b=s; end if n>0 | na//2==1 then return s /*positive or odd negative ···*/ else return -s /*return a negative Fib number. */
/*────────────────────────────────────────────────────────────────────────────*/ fibR: procedure expose ?.; call ? fibR; parse arg n; na=abs(n); s= 1
if na<2 then return na; if n<0 then if n//2==0 then s=-1 return (fibR(na-1) + fibR(na-2)) * s
/*────────────────────────────────────────────────────────────────────────────*/ hofsQ: procedure expose ?.; call ? hofsq; parse arg n; if n<2 then return 1
return hofsQ(n-hofsQ(n-1)) + hofsQ(n-hofsQ(n-2))
/*────────────────────────────────────────────────────────────────────────────*/ length: procedure expose ?.; call ? length; return 'LENGTH'(arg(1))</lang> output when using the default input:
number of invocations for ! (factorial) = 20 number of invocations for ! recursive = 210 number of invocations for Fibonacci = 20 number of invocations for Fib recursive = 57290 number of invocations for Hofstadter Q = 308696 number of invocations for length = 60
version 2
This program counts statically. It lacks, however, treatment of comments and literal strings. <lang rexx>fid='pgm.rex' cnt.=0 funl= Do While lines(fid)>0
l=linein(fid) Do Until p=0 p=pos('(',l) If p>0 Then Do do i=p-1 To 1 By -1 While is_tc(substr(l,i,1)) End fn=substr(l,i+1,p-i-1) If fn<> Then Call store fn l=substr(l,p+1) End End End
Do While funl<>
Parse Var funl fn funl Say right(cnt.fn,3) fn End
Exit x=a(3)+bbbbb(5,c(555)) special=date('S') 'DATE'() "date"() is_tc: abc='abcdefghijklmnopqrstuvwxyz' Return pos(arg(1),abc||translate(abc)'1234567890_"')>0
store: Parse Arg fun cnt.fun=cnt.fun+1 If cnt.fun=1 Then
funl=funl fun
Return</lang>
- Output:
1 lines 1 linein 2 pos 1 ' 1 is_tc 3 substr 1 right 1 a 1 bbbbb 1 c 1 date 1 'DATE' 1 "date" 1 arg 1 translate
Tcl
<lang tcl>package require Tcl 8.6
proc examine {filename} {
global cmds set RE "(?:^|\[\[\{\])\[\\w:.\]+" set f [open $filename] while {[gets $f line] >= 0} {
set line [string trim $line] if {$line eq "" || [string match "#*" $line]} { continue } foreach cmd [regexp -all -inline $RE $line] { incr cmds([string trim $cmd "\{\["]) }
} close $f
}
- Parse each file on the command line
foreach filename $argv {
examine $filename
}
- Get the command list in order of frequency
set cmdinfo [lsort -stride 2 -index 1 -integer -decreasing [array get cmds]]
- Print the top 10 (two list items per entry, so 0-19, not 0-9)
foreach {cmd count} [lrange $cmdinfo 0 19] {
puts [format "%-20s%d" $cmd $count]
}</lang> Sample run (note that the commands found are all standard Tcl commands; they're just commands so it is natural to expect them to be found):
bash$ tclsh8.6 RosettaCode/cmdfreq.tcl RosettaCode/*.tcl set 2374 expr 846 if 775 puts 558 return 553 proc 549 incr 485 foreach 432 lindex 406 lappend 351