Peter's Data Crunching Cheat Sheet

The design of advanced quantitative models and the implementation of complex data mining systems might be at the centre of my daily work. But even though, quite a considerable part of my time is actually spend crunching data: shuffling, sorting and scanning through data (which sometimes is not more than just eye-balling) in order to be able to understand and use it.

Greg Wilson from the University of Toronto wrote a great book on Data Crunching, where most of the basic stuff is thoroughly explained and presented in a concise manner. But most of his code examples are in either Java or Python. I really enjoy working with those two languages but every now and then I do need to use some of the other languages and tools common to our trade.

Therefore I've put together, as a reference for myself and others, a small collection of some basic data crunching routines in some common languages and tools:

TaskJavaScalaC#PythonPerlMatlabRJuliaSPSSSAS
writing to a file writeFile.java writeFile.scala writeFile.cs writeFile.py writeFile.pm writeCSV.M writeCSV.R writeFile.jl writeCSV.sps writeCSV.sas
reading from a file readFile.java readFile.scala readFile.cs readFile.py readFile.pm readCSV.M readCSV.R readFile.jl readCSV.sps readCSV.sas
containers containers.java containers.scala containers.cs containers.py containers.pm containers.M containers.R containers.jl containers.sps containers.sas
for loop forLoop.java forLoop.scala forLoop.cs forLoop.py forLoop.pm forLoop.M forLoop.R forLoop.jl forLoop.sps forLoop.sas
oo - concepts ooConcepts.java ooConcepts.scala ooConcepts.cs ooConcepts.py ooConcepts.pm ooConcepts.M ooConcepts.R ooConcepts.jl ooConcepts.sps ooConcepts.sas
database connection database.java database.scala database.cs database.py database.pm . database.R database.jl database.sps database.sas
random numbers random.java random.scala random.cs random.py random.pm random.M random.R random.jl random.sps random.sas
linear algebra matrix.java matrix.scala matrix.cs matrix.py matrix.pm matrix.M matrix.R matrix.jl matrix.sps matrix.sas
scatter plot scatterPlot.java scatterPlot.scala scatterPlot.cs scatterPlot.py . scatterPlot.M scatterPlot.R scatterPlot.jl scatterPlot.sps scatterPlot.sas
line chart lineChart.java lineChart.scala lineChart.cs lineChart.py . lineChart.M lineChart.R lineChart.jl lineChart.sps lineChart.sas
bar chart barChart.java barChart.scala barChart.cs barChart.py . barChart.M barChart.R barChart.jl barChart.sps barChart.sas
pie chart pieChart.java pieChart.scala pieChart.cs pieChart.py . pieChart.M pieChart.R . pieChart.sps pieChart.sas

Writing to a file

Java

//writeFile.java

try {
java.io.FileWriter writer = new java.io.FileWriter("C:/data/newFile.txt");
writer.write("line_1 \r\n");
writer.write("line_2 \r\n");
writer.flush();
writer.close();
} catch (java.io.IOException e) {
e.printStackTrace();
}

back to top

Scala

//writeFile.scala
//just using the java way of doing things

val file = new File("C:/data/newFile.txt")
val bw = new BufferedWriter(new FileWriter(file))
bw.write("line 1 \n")
bw.write("line 2 \n")
bw.close()

back to top

C#

//writeFile.cs
System.IO.TextWriter writer = new System.IO.StreamWriter("C:/data/newFile.txt");
writer.WriteLine("line_1");
writer.WriteLine("line_2");
writer.Flush();
writer.Close();

back to top

Python

#writeFile.py

writer = open("C:/data/newFile.txt", "w")
writer.write("line_1 \n");
writer.write("line_2 \n");
writer.write("line_3 \n");
writer.close();

back to top

Perl

#writeFile.pm

open newFile, ">C:/data/newFile.txt";
print newFile "line 1 \n";
print newFile "line 2 \n";
print newFile "line 3 \n";
close newFile;

back to top

Ruby

#writeFile.rb
writer = File.new("C:/data/newFile.txt", "w")
writer.print("line_1 \n");
writer.print("line_2 \n");
writer.print("line_3 \n");
writer.close();

back to top

Matlab

%writeCSV.M
mat = [1.0 1.1 1.2; 1.3 1.4 1.5]
dlmwrite('./data/test.csv', mat)

back to top

R

#writeCSV.R
v1=c(1.0, 1.1, 1.2)
v2=c(1.3, 1.4, 1.5)
df = data.frame(col1=v1, col2=v2)
write.csv(df, "C:/data/newFile.csv")

back to top

Julia

#writeFile.jl
a = Int64[]
push!(a,1)
append!(a,[2,3])
b = ["a", "b", "c"]
c = Array[]
push!(c,a)
push!(c,b)

file = open("C:/data/test.csv", "w")
for zeile in c
for element in zeile
write(file, "$element")
if element!=zeile[end]
write(file,",")
end
end
write(file, "\r\n")
end

close(file)

back to top

SPSS

*writeCSV.sps;

DATA LIST
/one 1-5 (A) two 6-10.

BEGIN DATA
line1 0.01
line2 0.02
line3 0.03
END DATA.


SAVE TRANSLATE OUTFILE='C:\data\test.csv'
/TYPE=CSV
/CELLS=VALUES.

back to top

SAS

*writeCSV.sas;

data test;
input one $ two;
cards;
line1 0.01
line2 0.02
line3 0.03
line4 0.04
run;

proc export data=test outfile="C:\data\newFile.csv" dbms=csv replace;
run;

back to top

Reading from a file

Java

//readFile.java

try {
java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.FileReader("C:/data/newFile.txt"));
String line = reader.readLine();

while(line != null) {
System.out.println(line);
line = reader.readLine();
}
reader.close();
} catch(java.io.IOException e) {
e.printStackTrace();
}

back to top

Scala

//readFile.scala

val textfile = Source.fromFile("C:/data/newFile.txt")
for(line <- textfile.getLines) {
println(line.toUpperCase)
} textfile.close

back to top

C#

//readFile.cs
using System.IO;

System.IO.TextReader reader = new System.IO.StreamReader("C:/data/newFile.txt");
String line = reader.ReadLine();
while(line != null) {
Console.WriteLine(line);
line = reader.ReadLine();
}
reader.Close();

back to top

Python

#readFile.py

reader = open("C:/data/newFile.txt", "r")
for line in reader:
print line.strip()
reader.close()

back to top

Perl

#readFile.pm

open newFile, "< C:/data/newFile.txt";
while () {
print $_;
}
close newFile;

back to top

Ruby

#readFile.rb
reader = File.new("C:/data/newFile.txt", "r")
reader.each_line {|line| print line}
reader.close()

back to top

Matlab

%readCSV.M
mat = dlmread('./data/test.csv')

back to top

R

#readCSV.R
df<-read.csv("C:/data/newFile.csv")

back to top

Julia

file = open(readall, "C:/data/test.csv")

lines = split(file, "\n")
for line in lines
elements = split(line, ",")
for element in elements
print(element)
end
end

back to top

SPSS

*readCSV.sps;

GET DATA
/TYPE = TXT
/DELIMITERS = ","
/FILE = 'C:\data\test.csv'
/VARIABLES = one A5 two F5.2 .

DATASET NAME DataSet1 WINDOW=FRONT.

back to top

SAS

*readCSV.sas;
proc import datafile="C:\data\newFile.csv" out=test dbms=csv replace;
getnames=yes;
run;

back to top

Containers

Java

//containers.java

int[] array = new int[2];
array[0] = 1;

double[][] matrix = new double[2][2];
matrix[0][0] = 1.0;

java.util.ArrayList< String > arrayList = new java.util.ArrayList< String >();
arrayList.add("element_1");
String element = arrayList.get(0);

java.util.TreeMap< Integer, String > treeMap = new java.util.TreeMap< Integer, String >();
treeMap.put(0,"element_0");
Integer firstKey = treeMap.firstKey();
String value = treeMap.get(firstKey);

back to top

Scala

//containers.scala

val liste01 = List("red", "green", "blue", "yellow")
println(liste01)
println(liste01(0))
val liste02 = liste01 :+ "black"
println(liste02)
val liste03 = liste02.sorted
println(liste03)
val liste04 = liste03 map (_.toUpperCase)
println(liste04)

val mappe = Map("blau" -> "blue", "rot" -> "red", "green" -> "grĂ¼n")
for( (k, v) <- mappe) {
println("ger: "+k+" eng: "+v)
}

back to top

C#

//containers.cs
int[] array = new int[2];
array[0] = 1;

double[,] matrix = new double[2,2];
matrix[0,0] = 1.0;

List list = new List< String >();
list.Add("element_1");
String element = list[0];

Dictionary< int, String > dictionary = new Dictionary< int, String >();
dictionary[0] = "element_0";
Integer firstKey = dictionary.Keys.First();
String value = dictionary[firstKey];

back to top

Python

#containers.py
#see
random.py for numpy arrays

list = []
list.append("element")
element = list[0]
print element

dictionary = {}
dictionary["key_1"] = "element_1"
key1 = dictionary.keys()[0]
element1 = dictionary[key1]
print element1

back to top

Perl

#containers.pm
@array = ("a", "b", 3, 4);

foreach (@array) {
print "$_\n";
}

print "$array[2]\n";



%hash = ("key1" => "element1", "key2" => 2);

foreach $key (sort keys %hash) {
$value = $hash{$key};
print "$key => $value\n";
}

back to top

Ruby

#containers.rb
array = ["a", "b", 3, 4];
array.each { | value | puts "value: #{value} \n" };

hash = {"key1" => "element1", "key2" => 2};
hash.each_pair {|key, value| puts "key: #{key} value: #{value} \n"};

back to top

Matlab

%containers.M
str = 'hello world'
value = 3.1416
matrix = [1.0 1.1 1.2; 1.3 1.4 1.5]
struc.key1 = str
struc.key2 = value
struc.key3 = matrix

back to top

R

#containers.R
numericVector <- c(1.0, 2.0, 3, 4)

alphaNumericVector <- c("one", "two", "three", "two")

multiDimArray <- array(numericVector, c(2,2,3,3))

aMatrix <- matrix(numericVector, c(2,2))

aFactor <- factor(alphaNumericVector)

aList <- list(first=numericVector, second=alphaNumericVector, third=aMatrix, fourth="fourth", fifth=5)

aDataFrame <- data.frame(a=numericVector[0:2], b=alphaNumericVector, c=aFactor)

back to top

Julia

#containers.jl
a = String[]
push!(a, "a")

b = Float64[]
push!(b, 2.5)

c = Dict{Int64, String}()
c[1] = "a"

back to top

SPSS

*containers.sps;

NEW FILE.

INPUT PROGRAM.

VECTOR vec(3).
COMPUTE vec(1) = 1.
COMPUTE vec(2) = 2.
COMPUTE vec(3) = 3.

END FILE.

END INPUT PROGRAM.

EXECUTE.

LIST.

back to top

SAS

*containers.sas;

data test;
input one $ two;
cards;
line1 0.01
line2 0.02
line3 0.03
line4 0.04
run;

data test1;
set test;
array a two;
do count = 1 to dim(a);
three = 3 * a{count};
end;
drop count;
run;

back to top

For loop

Java

//forLoop.java
for(int i=0; i<10; i++) {
System.out.println("line_"+i);
}


java.util.ArrayList< String > arrayList = new java.util.ArrayList< String >();
arrayList.add("element_1");
arrayList.add("element_2");

for(String element : arrayList) {
System.out.println(element);
}

back to top

Scala

//forLoop.scala
for(i <- 1 to 10) {
i match {
case 1 | 3 | 5 | 7 | 9 => println(i + " is an odd number")
case 2 | 4 | 6 | 8 | 10 => println(i + " is an even number")
}
}

back to top

C#

//forLoop.cs for (int i = 0; i < 10; i++)
{
Console.WriteLine("line_" + i);
}

List< String > list = new List< String >();
list.Add("element_1");
list.Add("element_2");

foreach (String element in list)
{
Console.WriteLine(element);
}

back to top

Python

#forLoop.py

list = []
list.append("element_1")
list.append("element_2")

for i in range(len(list)):
print "position "+str(i)+": "+list[i]

for element in list:
print element

back to top

Perl

#forLoop.pm

foreach (0...4) {
print "numer is: $_\n";
}

back to top

Ruby

#forLoop.rb
array = ["a", "b", 3, 4];
array.each {|element| puts element}

back to top

Matlab

%forLoop.M
for i = 0:10
i*2
end

back to top

R

#forLoop.R
for(i in seq(10)) {
print(i)
}

back to top

Juli

#forLoop.jl
a = [1,2,3,4,8]
for element in a
print("$element \n")
end

back to top

SPSS

*forLoop.sps;

NEW FILE.

INPUT PROGRAM.

LOOP i = 1 to 10.
COMPUTE x = x + RV.NORMAL(0,1).
LEAVE i.
LEAVE x.
END CASE.
END LOOP.

END FILE.

END INPUT PROGRAM.

EXECUTE.

DATASET NAME werte.

SORT CASES x.

SAVE OUTFILE = "C:\data\test.sav".

back to top

SAS

*forLoop.sas;

data test;
do i=0 to 10;
output;
end;
run;

data test1;
set test;
array a i;
do count = 1 to dim(a);
b = 3*a{count};
end;
drop count;
run;

back to top

Object-oriented concepts

Java

//ooConcepts.java
public class ClassOne {
public void methodOne() {
System.out.println("I am method one from class one");
}
}

public interface InterfaceOne {
public void methodTwo();
}

public class ClassTwo extends ClassOne implements InterfaceOne {
public void methodOne() {
System.out.println("I am method one from class two");
}

public void methodTwo() {
System.out.println("I am method two from class two");
}
}

ClassOne one = new ClassOne();
one.methodOne();

ClassTwo two = new ClassTwo();
two.methodOne();
two.methodTwo();

back to top

Scala

//ooConcepts.scala
// Scala being a functional language takes
// Types and Traits much more serious than classes
object scalaClass {
def scalaMethod() {
println("I am a method")
}

def main(args: Array[String]) = {
scalaMethod()
}
}

back to top

C#

//ooConcepts.cs
public class ClassOne
{
public void methodOne()
{
Console.WriteLine("I am method one from class one");
}
}

interface InterfaceOne {
void methodTwo();
}

public class ClassTwo : ClassOne, InterfaceOne {
public void methodOne() {
Console.WriteLine("I am method one from class two");
}

public void methodTwo()
{
Console.WriteLine("I am method two from class two");
}
}


ClassOne one = new ClassOne();
one.methodOne();

ClassTwo two = new ClassTwo();
two.methodOne();
two.methodTwo();

back to top

Python

#ooConcepts.py

class classOne:
def methodOne(self):
print "I am method one from class one"
class classTwo (one):
def methodTwo(self, comment):
print "I am method two from class two"
print comment

c2 = classTwo()
c2.methodOne()
c2.methodTwo("I am a comment")

back to top

Perl

#ooConcepts.pm

sub mult {
my $a = @_[0];
my $b = @_[1];
my $c = $a * $b;
}

$one = 4;
$two = 5;
$three = &mult($one, $two);
print "value: $three";

back to top

Ruby

#ooConcepts.rb

class ClassOne
def methodOne
puts "I am method one from class one";
end
end;


class ClassTwo < ClassOne
def methodTwo(comment)
@localComment = comment;
puts "I am method two from class two";
puts @localComment
end
end;


c1 = ClassOne.new;
c1.methodOne;

c2 = ClassTwo.new;
c2.methodOne;
c2.methodTwo("I am a comment");

back to top

Matlab

%ooConcepts.M
function r = ooConcepts(a, b)
r = a + 2*b

result = ooConcepts(4,5)

back to top

R

#ooConcepts.R
#run a script: source("scriptName")
#clear workspace: rm(list=ls())

sum <- function(vector) {
s <- 0
for(i in seq(along = vector)) {
s <- s + vector[i]
}
return(s)
}
a<-sum(c(1,2,3,4))
print(a)

back to top

Juli

#ooConcepts.jl
#to run a script:
#julia> include("C:/script.jl")

#simple function

function f1(a, b)
c = 0
for i in 1:a
c += b
end
c
end

print(f1(10,3))
print("\n")


#first class function

function createPower(x)
power = function (y)
return y^x
end
return power
end

square = createPower(2)
print(string("4^2: ", square(4), "\n"))
cube = createPower(3)
print(string("4^3: ", cube(4)), "\n")


#type

abstract Bicycle

type Roadbike <: Bicycle
number_of_gears::Int64
end

myBike = Roadbike(24)

back to top

SPSS

*ooConcepts.sps;

DEFINE !macro1(a=!CHAREND(',') /b=!CMDEND).
COMPUTE var1 = !a + !b.
END CASE.
!ENDDEFINE.

DEFINE !macro2(a=!TOKENS(1)).

!DO !i = 1 !TO !a.
COMPUTE var2 = !i+5.
END CASE.
!DOEND.


!ENDDEFINE.


NEW FILE.
INPUT PROGRAM.


!macro1 a=4,b=5.
!macro2 a=4.

END FILE.
END INPUT PROGRAM.
EXECUTE.

back to top

SAS

*ooConcepts.sas;

%macro mult(a, b);
data results;
c = &a * &b;
run;
%mend;


%macro loop(a);
%do i = 1 %to &a;
%put &i;
%end;
%mend;


data test;
%mult(4,5);
%loop(3);
run;

back to top

Database connection

Java

//database.java
//using SQLiteJDBC

try {
Class.forName("org.sqlite.JDBC");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}

try {
java.sql.Connection conn = java.sql.DriverManager.getConnection("jdbc:sqlite:test.db");
java.sql.Statement stat = conn.createStatement();
stat.executeUpdate("drop table if exists people;");
stat.executeUpdate("create table people (name, year_of_birth);");

java.sql.PreparedStatement prepStat = conn.prepareStatement("insert into people values (?,?);");
prepStat.setString(1,"Goethe");
prepStat.setInt(2, 1749);
prepStat.addBatch();
prepStat.setString(1,"Schiller");
prepStat.setInt(2, 1759);
prepStat.addBatch();
prepStat.setString(1,"Napoleon");
prepStat.setInt(2, 1769);
prepStat.addBatch();

conn.setAutoCommit(false);
prepStat.executeBatch();
conn.setAutoCommit(true);

java.sql.ResultSet rs = stat.executeQuery("select * from people;");
while (rs.next()) {
System.out.println("name = " + rs.getString("name"));
System.out.println("year of birth = " + rs.getInt("year_of_birth"));
}
rs.close();
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}

back to top

Scala

//database.scala
//SLICK provides a functional interface for SQL
// but I am actually quite happy with
JDBC

back to top

C#

//database.cs
//using MS-ACCESS

private System.Data.DataTable doQuery(String dbFileName, String sql)
{
System.Data.Common.DbProviderFactory factory = System.Data.Common.DbProviderFactories.GetFactory("System.Data.OleDb");

System.Data.Common.DbConnection connection = factory.CreateConnection();
connection.ConnectionString = getMSAccessConnectionString(dbFileName);

System.Data.Common.DbCommand command = factory.CreateCommand();
command.CommandText = sql;
command.Connection = connection;

System.Data.Common.DbDataAdapter adapter = factory.CreateDataAdapter();
adapter.SelectCommand = command;
System.Data.DataSet dataset = new System.Data.DataSet();
adapter.Fill(dataset);

return dataset.Tables[0];
}


private void doUpdate(String dbFileName, String sql)
{
System.Data.Common.DbProviderFactory factory = System.Data.Common.DbProviderFactories.GetFactory("System.Data.OleDb");

System.Data.Common.DbConnection connection = factory.CreateConnection();
connection.ConnectionString = getMSAccessConnectionString(dbFileName);

System.Data.Common.DbCommand command = factory.CreateCommand();
command.CommandText = sql;
command.Connection = connection;

connection.Open();
command.ExecuteNonQuery();
connection.Close();
}


private String getMSAccessConnectionString(String fileName)
{
String connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + ";Jet OLEDB:Engine Type=5";
return connectionString;
}



String sqlCreate = "CREATE TABLE people (name text, year_of_birth int)";
doUpdate("C:/data/test.mdb", sqlCreate);
String sqlGoethe = "INSERT INTO people (name, year_of_birth) VALUES ('Goethe', 1749)";
doUpdate("C:/data/test.mdb", sqlGoethe);
String sqlSchiller = "INSERT INTO people (name, year_of_birth) VALUES ('Schiller', 1759)";
doUpdate("C:/data/test.mdb", sqlSchiller);
String sqlNapoleon = "INSERT INTO people (name, year_of_birth) VALUES ('Napoleon', 1769)";
doUpdate("C:/data/test.mdb", sqlNapoleon);

String sqlQuery = "SELECT * FROM people";
System.Data.DataTable table = doQuery("C:/data/test.mdb", sqlQuery);
foreach (System.Data.DataRow row in table.Rows)
{
String name = (String)row[0];
int yearOfBirth = (int)row[1];
Console.WriteLine("name: " + name + " year of birth: " + yearOfBirth);
}

back to top

Python

#database.py
#using pysqlite

from pysqlite2 import dbapi2 as sqlite

con = sqlite.connect("mydb")
cursor = con.cursor()
cursor.execute("create table people (name varchar(20),year_of_birth integer);")
cursor.execute("insert into people (name, year_of_birth) values ('Goethe', 1749);")
cursor.execute("insert into people (name, year_of_birth) values ('Schiller', 1759);")
cursor.execute("insert into people (name, year_of_birth) values ('Napoleon', 1769);")
con.commit()

SELECT = "select * from people"
cursor.execute(SELECT)
print cursor.fetchall()

cursor.execute(SELECT)
for row in cursor:
print "name: ", row[0], " year of birth: ", row[1]

back to top

Perl

#database.pm
#using DBD_SQLite

use DBI;
$sql1 = "drop table if exists people";
$sql2 = "create table people (name, year_of_birth)";
$sql3 = "insert into people values ('Goethe', 1749)";
$sql4 = "insert into people values ('Schiller', 1759)";
$sql5 = "insert into people values ('Napoleon', 1769)";
@sqls = ($sql1, $sql2, $sql3, $sql4, $sql5);

$dbh = DBI->connect( "dbi:SQLite:dbname=TEST_DB", "", "" );

foreach(@sqls) {
$sth_in = $dbh->prepare($_);
$sth_in->execute();
} $sth_in-> finish();


$sql_read = "select name, year_of_birth from people";

$sth_out = $dbh->prepare($sql_read);
$sth_out->execute();
$sth_out->bind_columns( \$name, \$year_of_birth );

while ( $sth_out->fetch() ) {
print "name $name, year $year_of_birth \n";
}

$sth_out->finish();

$dbh->disconnect();

back to top

R

#database.R
names <- c("Schiller", "Goethe", "Napoleon")
years <- c(1749, 1759, 1769)
f1 <- data.frame(names=names, years=years)
library("RSQLite")
con <- dbConnect(dbDriver("SQLite"), dbname="mydb")
#dbSendQuery(con, "drop table years_of_birth")
dbWriteTable(con, "years_of_birth", f1)

t <- dbListTables(con)
rs <- dbSendQuery(con, "select * from years_of_birth")
f2 <- fetch(rs)
dbClearResult(rs)
dbDisconnect(con)

back to top

Julia

#database.jl
using ODBC

ODBC.connect("SPSSDATA", usr="db2admin", pwd="db2admin")

dataFrame = query("select * from BETRUG")

disconnect()

back to top

SPSS

*database.sps;
GET DATA
/TYPE=ODBC
/CONNECT='DSN=SPSSDATA;UID db2admin;PWD db2admin;DBALIAS=SPSSDATA;'
/SQL='SELECT "X1", "X2", "X3" FROM "DB2ADMIN"."CLUSTERING"'
/ASSUMEDSTRWIDTH=255
/UNENCRYPTED.

CACHE.
EXECUTE.
DATASET NAME DataSet1 WINDOW=FRONT.

back to top

SAS

*database.sas;
*using sqlite and sqliteodbc.exe;
*datasource=sqliteDataSource;
*database=c:/data/sqliteDataBase.db3;

libname datBase odbc dsn=sqliteDataSource;

proc sql;
create table datBase.people (name varchar(20),year_of_birth integer);
quit;

proc sql;
insert into datBase.people (name, year_of_birth) values ('Goethe', 1749);
quit;

proc sql;
insert into datBase.people (name, year_of_birth) values ('Schiller', 1759);
quit;

proc sql;
insert into datBase.people (name, year_of_birth) values ('Napoleon', 1769);
quit;

proc sql;
create table test as select * from datBase.people;
quit;

back to top

Random numbers

Java

//random.java
int n = 10;
int k = 3;
double[][] x = new double[n][k];
java.util.Random rand = new java.util.Random();
for (int i = 0; i < n; i++) {
for (int j = 0; j < k; j++) {
x[i][j] = rand.nextGaussian();
}
}

back to top

Scala

//random.scala
//using Breeze
import breeze.linalg._
val norm = breeze.stats.distributions.Gaussian(0,1)
val werte = norm.sample(20)
print(werte)

back to top

C#

//random.cs
int n = 10;
int k = 3;
double[,] x = new double[n,k];
Random rand = new Random();
for (int i = 0; i < n; i++) {
for (int j = 0; j < k; j++) {
x[i,j] = rand.NextDouble();
}
}

back to top

Python

#random.py
import numpy

mu = 0
sig = 1
n = 10

x = numpy.random.normal(mu, sig, n)
y = numpy.ones(n) + 0.5 * x + numpy.random.normal(0, 1, n)
print y

back to top

Perl

#random.pm
use Math::Random;

$mu = 0;
$sig = 1;
$value = random_normal($mu, $sig);
print "value $value";

back to top

Ruby

#random.rb
n = 8 k = 3 x = Array.new() x.fill(0...n){|y| y=Array.new(); y.fill(0...k) {|z| z=rand()}}

back to top

Matlab

%random.M
mu =0 sig=1
n=10
k=1

vals = random('norm', mu, sig, n, k)

back to top

R

#random.R
n<- 10
x <- 100 * rnorm(n)
u <- rnorm(n)
y <- rep(0,n)
for(i in 1:n)
y[i] = 1 + 0.5*x[i] + u[i]

back to top

Julia

#random.jl
using Distributions

logNorm = LogNormal()

values = rand(logNorm, 100)

skewness(values)
kurtosis(values)

back to top

SPSS

*random.sps;

NEW FILE.

INPUT PROGRAM.

LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
COMPUTE x2 = RV.UNIFORM(0.0, 1.0).
COMPUTE x3 = RV.POISSON(0.5).
END CASE.
END LOOP.

END FILE.

END INPUT PROGRAM.

EXECUTE.

DATASET NAME werte.

SAVE OUTFILE = "C:\data\test.sav".

back to top

SAS

*random.sas;
data test;
do i=0 to 500;
x = normal(-1);
x1 = rannor(123);
y = uniform(-1);
y1 = ranuni(123);
output;
end;
run;

back to top

Linear Algebra

Java

//matrix.java
//using JAMA: Java Matrix Package

int n = 10;
int k = 3;
double[][] x = new double[n][k];
java.util.Random rand = new java.util.Random();
for (int i = 0; i < n; i++) {
for (int j = 0; j < k; j++) {
x[i][j] = rand.nextGaussian();
}
}

Jama.Matrix X = new Jama.Matrix(x);
Jama.Matrix XtX = X.transpose().times(X);
Jama.Matrix XtXi = XtX.inverse();
for(int i=0; i< XtXi.getRowDimension(); i++) {
for(int j=0; j< XtXi.getColumnDimension(); j++) {
System.out.print(" "+XtXi.get(i,j));
}
System.out.println("");
}

back to top

Scala

//matrix.scala
// using Breeze

import breeze.linalg._

val x = DenseMatrix.rand(3,4)
val y = DenseVector.ones[Double](4)

val xtx = x.t * x

val xtxi = inv(xtx)

val beta = xtxi * y

println("beta: "+beta)

back to top

C#

//matrix.cs
// using dnAnalytics dnAnalytics.LinearAlgebra.Matrix mX = dnAnalytics.LinearAlgebra.MatrixBuilder.CreateMatrix(n, k); for (int i = 0; i < y.Length; i++) {
for (int j = 0; j < y[i].Length; j++) {
mX[i, j] = y[i][j];
}
}

back to top

Python

#matrix.py
#using numpy

import numpy

n = 500

# using numpy arrays
mu = 10
sig = 1
values = numpy.random.normal(mu, sig, n)
x = numpy.column_stack((numpy.ones(n) ,values))
X = numpy.matrix(x)

# using python lists
u = []
for i in range(n):
u.append(numpy.random.normal(0,1))
U = numpy.matrix(u).T

B = numpy.matrix([[1.0], [0.5]])
Y = X * B + U
beta = (X.T * X).I * X.T * Y

print beta

back to top

Perl

#matrix.pm
use Math::Matrix;

$X = new Math::Matrix( [1.0, 2.0, 3.0], [4.0, 5.0, 6.0] );
$X -> print("matrix X:\n");

$Y = new Math::Matrix( [rand], [rand]);
$Y -> print ("matrix Y:\n");

$Xt = $X -> transpose;
$Z = $Xt -> multiply($Y);
$Z -> print("matrix X'Y:\n");

back to top

Matlab

%matrix.M
X = random('norm', 0, 1, 10, 4)
Xt = X'
XtX = Xt * X
XtXi = inv(XtX)

back to top

R

#matrix.R
y0 <- rnorm(100)
yM <- matrix(y0, ncol=1)

x0 <- rnorm(300)
x <- matrix(x0, ncol=3)

XtX <- t(x)%*%x

library(MASS)
XtXi <- ginv(XtX)
Xty <- t(x)%*%y
beta <- XtXi%*%Xty

print(beta)

back to top

Julia

#matrix.jl
n = 100
k = 4
X = 100+10*randn(n,k)

e = randn(n)

b = [2.5, 3.5, 4.5, 5.5]

y = X*b + e

bb = inv(X'X)*X'y

back to top

SPSS

*matrix.sps;

MATRIX.

compute x = {1,2,3;4,5,6}.
compute y = {10, 10, 10}.
compute yT = T(y).
compute z = x * yT.
print x.
print y.
print yT.
print z.

END MATRIX.

back to top

SAS

*matrix.sas;
data reg;
do i=0 to 500;
x = 100 + 100.0 * normal(123456);
y = 2.5 + 5.0 * x + normal(987654);
output;
end;
run;

proc iml;
use reg var {x,y};

read all var {x} into x1;
x0 = J(nrow(x1), 1,1.0);
x = x0||x1;
read all var {y} into y;

close reg;

b = inv(x`*x)*x`*y;
reset print;
print b;
quit;

back to top

Scatter plot

Java

//scatterPlot.java
//using JFreeChart

String title = "Scatter plot";

org.jfree.data.xy.XYSeries series1 = new org.jfree.data.xy.XYSeries("series one");
org.jfree.data.xy.XYSeries series2 = new org.jfree.data.xy.XYSeries("series two");
for (int i = 0; i < 20; i++) {
series1.add(i + Math.random(), 10 * Math.random());
series2.add(10 * Math.random(), i + Math.random());
}

org.jfree.data.xy.XYSeriesCollection seriesCollection = new org.jfree.data.xy.XYSeriesCollection();
seriesCollection.addSeries(series1);
seriesCollection.addSeries(series2);

org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createScatterPlot(title, "xAxis", "yAxis",
seriesCollection, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, true);

org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);

javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);

back to top

Scala

//scatterPlot.scala
//using Breeze

import breeze.plot._

val y = DenseVector.rand(20)
val x = linspace(1, 10, 20)

val fig = Figure("scatter plot")
val plt = fig.subplot(0)
plt += plot(x, y, '+', colorcode="blue")

back to top

C#

//scatterPlot.cs
//using ZedGraph and Window Forms

GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";

PointPairList list = new PointPairList();
Random rand = new Random();
for (double i = 0; i < 20; i++)
{
double x = i;
double y = rand.NextDouble() * 10.0;
list.Add(x, y);
}

LineItem curve = myPane.AddCurve("label", list, Color.Red);
curve.Line.IsVisible = false;

zedGraphControl.AxisChange();
Refresh();

back to top

Python

#scatterPlot.py
#using numpy and matplotlib

import numpy
import pylab

n = 20
x = numpy.arange(0.0, n)
y = numpy.random.normal(0,1,n)

pylab.plot(x, y, "o")
pylab.xlabel("x")
pylab.ylabel("y")
pylab.title("scatter plot")
pylab.grid(True)
pylab.savefig("scatter_plot")
pylab.show()

back to top

Matlab

%scatterPlot.R
x = rand(20,1)
y = rand(20,1)
scatter(x, y)

back to top

R

#scatterPlot.R
x <- rnorm(100)
plot(x)

back to top

Julia

#scatterPlot.jl
using Gadfly

plot(x=1:5, y=rand(5), color=rand(5), Geom.point)

back to top

SPSS

#scatterPlot.sps

NEW FILE.

INPUT PROGRAM.

LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
COMPUTE x2 = RV.UNIFORM(0.0, 1.0).
END CASE.
END LOOP.

END FILE.

END INPUT PROGRAM.

EXECUTE.
* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=x2 x1 MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: x2=col(source(s), name("x2"))
DATA: x1=col(source(s), name("x1"))
GUIDE: axis(dim(1), label("x2"))
GUIDE: axis(dim(2), label("x1"))
ELEMENT: point(position(x2*x1))
END GPL.

back to top

SAS

#scatterPlot.sas
data test;
do i=0 to 20;
x = 0.5 + uniform(-1);
output;
end;
run;

proc gplot data=test;
plot x * i ;
run;

back to top

Line chart

Java

//lineChart.java
//using JFreeChart

String title = "Line chart";

org.jfree.data.time.TimeSeries series1 = new org.jfree.data.time.TimeSeries("series one");
org.jfree.data.time.TimeSeries series2 = new org.jfree.data.time.TimeSeries("series two");
for (int i = 0; i < 20; i++) {
org.jfree.data.time.Day day = new org.jfree.data.time.Day(i+1, 9, 2010);
series1.add(day, 10 * Math.random());
series2.add(day, 20 * Math.random());
}

org.jfree.data.time.TimeSeriesCollection seriesCollection = new org.jfree.data.time.TimeSeriesCollection();
seriesCollection.addSeries(series1);
seriesCollection.addSeries(series2);

org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createTimeSeriesChart(title, "xAxis", "yAxis",
seriesCollection, true, true, true);

org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);

javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);

back to top

Scala

//lineChart.scala
//using Breeze

import breeze.plot._

val y = DenseVector.rand(20)
val x = linspace(1, 10, 20)

val fig = Figure("scatter plot")
val plt = fig.subplot(0)
plt += plot(x, y, '-', colorcode="blue")
// difference to scatter plot: - instead of +

back to top

C#

//lineChart.cs
//using ZedGraph and Window Forms

GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";

PointPairList list = new PointPairList();
Random rand = new Random();
for (double i = 0; i < 20; i++)
{
double x = i;
double y = rand.NextDouble() * 10.0;
list.Add(x, y);
}

LineItem curve = myPane.AddCurve("label", list, Color.Red);

zedGraphControl.AxisChange();
Refresh();

back to top

Python

#lineChart.py
#using numpy and matplotlib

import numpy
import pylab

n = 20
x = numpy.arange(0.0, n)
y = numpy.random.normal(0,1,n)

pylab.plot(x, y)
pylab.xlabel("x")
pylab.ylabel("y")
pylab.title("line chart")
pylab.grid(True)
pylab.savefig("line_chart")
pylab.show()

back to top

Matlab

%lineChart.M
x = rand(20,1)
plot(x)

back to top

R

#lineChart.R
x <- rnorm(100)
plot(x, type="l")

back to top

Julia

#lineChart.jl
using Gadfly

plot(x=1:5, y=rand(5), color=rand(5), Geom.line)

back to top

SPSS

#lineChart.sps

NEW FILE.

INPUT PROGRAM.

LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
END CASE.
END LOOP.

END FILE.

END INPUT PROGRAM.

EXECUTE.

* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=i MEAN(x1)[name="MEAN_x1"] MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: i=col(source(s), name("i"), unit.category())
DATA: MEAN_x1=col(source(s), name("MEAN_x1"))
GUIDE: axis(dim(1), label("i"))
GUIDE: axis(dim(2), label("Mean x1"))
SCALE: linear(dim(2), include(0))
ELEMENT: line(position(i*MEAN_x1), missing.wings())
END GPL.

back to top

SAS

#lineChart.sas
data test;
do i=0 to 20;
x = 0.5 + uniform(-1);
output;
end;
run;

symbol interpol=join;

proc gplot data=test;
plot x * i ;
run;

back to top

Bar chart

Java

//barChart.java
//using JFreeChart

org.jfree.data.category.DefaultCategoryDataset dataset = new org.jfree.data.category.DefaultCategoryDataset();
dataset.addValue(0.1, "series1", "category1");
dataset.addValue(0.5, "series2", "category1");
dataset.addValue(1.0, "series3", "category1");
dataset.addValue(0.3, "series1", "category2");
dataset.addValue(0.7, "series2", "category2");
dataset.addValue(1.2, "series3", "category2");

org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createBarChart(title, "Category", "Value",
dataset, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, true);
org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);

javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);

back to top

Scala

//barChart.scala
//could not find a bar chart in breeze.viz
//but since Breeze is anyway using JFreeChart
//the original should do:
barChart.java

back to top

C#

//barChart.cs
//using ZedGraph and Window Forms

GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";

PointPairList list = new PointPairList();
Random rand = new Random();
for (double i = 0; i < 20; i++)
{
double x = i;
double y = rand.NextDouble() * 10.0;
list.Add(x, y);
}

BarItem bars = myPane.AddBar("label", list, Color.Red);

zedGraphControl.AxisChange();
Refresh();

back to top

Python

#barChart.py
#using numpy and matplotlib

import numpy
import pylab

n = 10
x = numpy.random.uniform(0, 1, n)

pylab.bar(range(n), x)
pylab.xlabel("categories")
pylab.ylabel("values")
pylab.title("bar chart")
pylab.grid(True)
pylab.savefig("bar_chart")
pylab.show()

back to top

Matlab

%barChart.M
x = rand(20,1)
bar(x)

back to top

R

#barChart.R
x <- rnorm(10)
barplot(x)

back to top

Julia

#barChart.jl
using Gadfly

plot(x=1:5, y=rand(5), color=rand(5), Geom.bar)

back to top

SPSS

*barChart.sps;

NEW FILE.

INPUT PROGRAM.

LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
END CASE.
END LOOP.

END FILE.

END INPUT PROGRAM.

EXECUTE.

* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=i MEAN(x1)[name="MEAN_x1"] MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: i=col(source(s), name("i"), unit.category())
DATA: MEAN_x1=col(source(s), name("MEAN_x1"))
GUIDE: axis(dim(1), label("i"))
GUIDE: axis(dim(2), label("Mean x1"))
SCALE: linear(dim(2), include(0))
ELEMENT: interval(position(i*MEAN_x1), shape.interior(shape.square))
END GPL.

back to top

SAS

*barChart.sas;

data test;
do i=0 to 5;
x = 0.5 + uniform(-1);
output;
end;
run;

proc gchart data=test;
vbar i /discrete sumvar = x ;
run;

back to top

Pie chart

Java

//pieChart.java
//using JFreeChart

String title = "Pie chart";

org.jfree.data.general.DefaultPieDataset dataset = new org.jfree.data.general.DefaultPieDataset();
dataset.setValue("A", 0.35);
dataset.setValue("B", 0.25);
dataset.setValue("C", 0.4);

org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createPieChart(title, dataset, true, true, true);

org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);

javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);

back to top

Scala

//pieChart.scala
//could not find a pie chart in breeze.viz
//but since Breeze is anyway using JFreeChart
//the original should do:
pieChart.java

back to top

C#

//pieChart.cs
//using ZedGraph and Window Forms

GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";

PieItem segment1 = myPane.AddPieSlice(100, Color.Red, 0, "segment 1");
PieItem segment2 = myPane.AddPieSlice(150, Color.Navy, 0, "segment 2");
PieItem segment3 = myPane.AddPieSlice(25, Color.Yellow, 0, "segment 3");
PieItem segment4 = myPane.AddPieSlice(75, Color.DarkGreen, 0, "segment 4");

Refresh();

back to top

Python

#pieChart.py
#using numpy and matplotlib

import numpy
import pylab

n = 5
x = numpy.random.uniform(0, 1, n)
s = sum(x)
y = []
labs = []
for i in range(n):
y.append(x[i]/s)
labs.append("cat " + str(i))
pylab.pie(y, labels=labs)
pylab.title("pie chart")
pylab.grid(True)
pylab.savefig("pie_chart")
pylab.show()

back to top

Matlab

%pieChart.M
x = rand(5,1)
pie(x)

back to top

R

#pieChart.R
x <- c(1, 2, 3, 4)
pie(x)

back to top

SPSS

*pieChart.sps;

NEW FILE.

INPUT PROGRAM.

LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
END CASE.
END LOOP.

END FILE.

END INPUT PROGRAM.

EXECUTE.

* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=i MEAN(x1)[name="MEAN_x1"] MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: i=col(source(s), name("i"), unit.category())
DATA: MEAN_x1=col(source(s), name("MEAN_x1"))
GUIDE: axis(dim(1), label("i"))
GUIDE: axis(dim(2), label("Mean x1"))
SCALE: linear(dim(2), include(0))
ELEMENT: interval(position(i*MEAN_x1), shape.interior(shape.square))
END GPL.

back to top

SAS

*pieChart.sas;

data test;
do i=0 to 5;
x = 0.5 + uniform(-1);
output;
end;
run;

proc gchart data=test;
pie i /discrete sumvar = x ;
run;

back to top