The design of advanced quantitative models and the implementation of complex data mining systems might be at the centre of my daily work. But even though, quite a considerable part of my time is actually spend crunching data: shuffling, sorting and scanning through data (which sometimes is not more than just eye-balling) in order to be able to understand and use it. Greg Wilson from the University of Toronto wrote a great book on Data Crunching, where most of the basic stuff is thoroughly explained and presented in a concise manner. But most of his code examples are in either Java or Python. I really enjoy working with those two languages but every now and then I do need to use some of the other languages and tools common to our trade. Therefore I've put together, as a reference for myself and others, a small collection of some basic data crunching routines in some common languages and tools: |
Java
//writeFile.java
try {java.io.FileWriter writer = new java.io.FileWriter("C:/data/newFile.txt");} catch (java.io.IOException e) {
writer.write("line_1 \r\n");
writer.write("line_2 \r\n");
writer.flush();
writer.close();e.printStackTrace();}
Scala
//writeFile.scala
//just using the java way of doing things
val file = new File("C:/data/newFile.txt")
val bw = new BufferedWriter(new FileWriter(file))
bw.write("line 1 \n")
bw.write("line 2 \n")
bw.close()
C#
//writeFile.cs
System.IO.TextWriter writer = new System.IO.StreamWriter("C:/data/newFile.txt");
writer.WriteLine("line_1");
writer.WriteLine("line_2");
writer.Flush();
writer.Close();
Python
#writeFile.py
writer = open("C:/data/newFile.txt", "w")
writer.write("line_1 \n");
writer.write("line_2 \n");
writer.write("line_3 \n");
writer.close();
Perl
#writeFile.pm
open newFile, ">C:/data/newFile.txt";
print newFile "line 1 \n";
print newFile "line 2 \n";
print newFile "line 3 \n";
close newFile;
Ruby
#writeFile.rb
writer = File.new("C:/data/newFile.txt", "w")
writer.print("line_1 \n");
writer.print("line_2 \n");
writer.print("line_3 \n");
writer.close();
Matlab
%writeCSV.M
mat = [1.0 1.1 1.2; 1.3 1.4 1.5]
dlmwrite('./data/test.csv', mat)
R
#writeCSV.R
v1=c(1.0, 1.1, 1.2)
v2=c(1.3, 1.4, 1.5)
df = data.frame(col1=v1, col2=v2)
write.csv(df, "C:/data/newFile.csv")
Julia
#writeFile.jl
a = Int64[]
push!(a,1)
append!(a,[2,3])
b = ["a", "b", "c"]
c = Array[]
push!(c,a)
push!(c,b)
file = open("C:/data/test.csv", "w")
for zeile in c
for element in zeileend
write(file, "$element")end
if element!=zeile[end]
write(file,",")end
write(file, "\r\n")
close(file)
SPSS
*writeCSV.sps;
DATA LIST
/one 1-5 (A) two 6-10.
BEGIN DATA
line1 0.01
line2 0.02
line3 0.03
END DATA.
SAVE TRANSLATE OUTFILE='C:\data\test.csv'
/TYPE=CSV
/CELLS=VALUES.
SAS
*writeCSV.sas;
data test;input one $ two;run;
cards;
line1 0.01
line2 0.02
line3 0.03
line4 0.04
proc export data=test outfile="C:\data\newFile.csv" dbms=csv replace;
run;
Java
//readFile.java
try {
java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.FileReader("C:/data/newFile.txt"));} catch(java.io.IOException e) {
String line = reader.readLine();
while(line != null) {System.out.println(line);}
line = reader.readLine();
reader.close();e.printStackTrace();}
Scala
//readFile.scala
val textfile = Source.fromFile("C:/data/newFile.txt")
for(line <- textfile.getLines) {println(line.toUpperCase)} textfile.close
C#
//readFile.cs
using System.IO;
System.IO.TextReader reader = new System.IO.StreamReader("C:/data/newFile.txt");
String line = reader.ReadLine();
while(line != null) {Console.WriteLine(line);}
line = reader.ReadLine();
reader.Close();
Python
#readFile.py
reader = open("C:/data/newFile.txt", "r")
for line in reader:print line.strip()reader.close()
Perl
#readFile.pm
open newFile, "< C:/data/newFile.txt";
while () {
print $_;}
close newFile;
Ruby
#readFile.rb
reader = File.new("C:/data/newFile.txt", "r")
reader.each_line {|line| print line}
reader.close()
Matlab
%readCSV.M
mat = dlmread('./data/test.csv')
R
#readCSV.R
df<-read.csv("C:/data/newFile.csv")
Julia
file = open(readall, "C:/data/test.csv")
lines = split(file, "\n")
for line in lines
elements = split(line, ",")end
for element in elements
print(element)end
SPSS
*readCSV.sps;
GET DATA
/TYPE = TXT
/DELIMITERS = ","
/FILE = 'C:\data\test.csv'
/VARIABLES = one A5 two F5.2 .
DATASET NAME DataSet1 WINDOW=FRONT.
SAS
*readCSV.sas;
proc import datafile="C:\data\newFile.csv" out=test dbms=csv replace;
getnames=yes;
run;
Java
//containers.java
int[] array = new int[2];
array[0] = 1;
double[][] matrix = new double[2][2];
matrix[0][0] = 1.0;
java.util.ArrayList< String > arrayList = new java.util.ArrayList< String >();
arrayList.add("element_1");
String element = arrayList.get(0);
java.util.TreeMap< Integer, String > treeMap = new java.util.TreeMap< Integer, String >();
treeMap.put(0,"element_0");
Integer firstKey = treeMap.firstKey();
String value = treeMap.get(firstKey);
Scala
//containers.scala
val liste01 = List("red", "green", "blue", "yellow")
println(liste01)
println(liste01(0))
val liste02 = liste01 :+ "black"
println(liste02)
val liste03 = liste02.sorted
println(liste03)
val liste04 = liste03 map (_.toUpperCase)
println(liste04)
val mappe = Map("blau" -> "blue", "rot" -> "red", "green" -> "grĂ¼n")
for( (k, v) <- mappe) {println("ger: "+k+" eng: "+v)}
C#
//containers.cs
int[] array = new int[2];
array[0] = 1;
double[,] matrix = new double[2,2];
matrix[0,0] = 1.0;
Listlist = new List< String >();
list.Add("element_1");
String element = list[0];
Dictionary< int, String > dictionary = new Dictionary< int, String >();
dictionary[0] = "element_0";
Integer firstKey = dictionary.Keys.First();
String value = dictionary[firstKey];
Python
#containers.py
#see random.py for numpy arrays
list = []
list.append("element")
element = list[0]
print element
dictionary = {}
dictionary["key_1"] = "element_1"
key1 = dictionary.keys()[0]
element1 = dictionary[key1]
print element1
Perl
#containers.pm
@array = ("a", "b", 3, 4);
foreach (@array) {
print "$_\n";}
print "$array[2]\n";
%hash = ("key1" => "element1", "key2" => 2);
foreach $key (sort keys %hash) {
$value = $hash{$key};}
print "$key => $value\n";
Ruby
#containers.rb
array = ["a", "b", 3, 4];
array.each { | value | puts "value: #{value} \n" };
hash = {"key1" => "element1", "key2" => 2};
hash.each_pair {|key, value| puts "key: #{key} value: #{value} \n"};
Matlab
%containers.M
str = 'hello world'
value = 3.1416
matrix = [1.0 1.1 1.2; 1.3 1.4 1.5]
struc.key1 = str
struc.key2 = value
struc.key3 = matrix
R
#containers.R
numericVector <- c(1.0, 2.0, 3, 4)
alphaNumericVector <- c("one", "two", "three", "two")
multiDimArray <- array(numericVector, c(2,2,3,3))
aMatrix <- matrix(numericVector, c(2,2))
aFactor <- factor(alphaNumericVector)
aList <- list(first=numericVector, second=alphaNumericVector, third=aMatrix, fourth="fourth", fifth=5)
aDataFrame <- data.frame(a=numericVector[0:2], b=alphaNumericVector, c=aFactor)
Julia
#containers.jl
a = String[]
push!(a, "a")
b = Float64[]
push!(b, 2.5)
c = Dict{Int64, String}()
c[1] = "a"
SPSS
*containers.sps;
NEW FILE.
INPUT PROGRAM.
VECTOR vec(3).
COMPUTE vec(1) = 1.
COMPUTE vec(2) = 2.
COMPUTE vec(3) = 3.
END FILE.
END INPUT PROGRAM.
EXECUTE.
LIST.
SAS
*containers.sas;
data test;input one $ two;run;
cards;
line1 0.01
line2 0.02
line3 0.03
line4 0.04
data test1;set test;run;
array a two;
do count = 1 to dim(a);three = 3 * a{count};end;
drop count;
Java
//forLoop.java
for(int i=0; i<10; i++) {System.out.println("line_"+i);}
java.util.ArrayList< String > arrayList = new java.util.ArrayList< String >();
arrayList.add("element_1");
arrayList.add("element_2");
for(String element : arrayList) {System.out.println(element);}
Scala
//forLoop.scala
for(i <- 1 to 10) {i match {}case 1 | 3 | 5 | 7 | 9 => println(i + " is an odd number")}
case 2 | 4 | 6 | 8 | 10 => println(i + " is an even number")
C#
//forLoop.cs for (int i = 0; i < 10; i++)
{Console.WriteLine("line_" + i);}
List< String > list = new List< String >();
list.Add("element_1");
list.Add("element_2");
foreach (String element in list)
{Console.WriteLine(element);}
Python
#forLoop.py
list = []
list.append("element_1")
list.append("element_2")
for i in range(len(list)):print "position "+str(i)+": "+list[i]
for element in list:print element
Perl
#forLoop.pm
foreach (0...4) {print "numer is: $_\n";}
Ruby
#forLoop.rb
array = ["a", "b", 3, 4];
array.each {|element| puts element}
Matlab
%forLoop.M
for i = 0:10i*2end
R
#forLoop.R
for(i in seq(10)) {print(i)}
Juli
#forLoop.jl
a = [1,2,3,4,8]
for element in aprint("$element \n")end
SPSS
*forLoop.sps;
NEW FILE.
INPUT PROGRAM.
LOOP i = 1 to 10.
COMPUTE x = x + RV.NORMAL(0,1).
LEAVE i.
LEAVE x.
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
EXECUTE.
DATASET NAME werte.
SORT CASES x.
SAVE OUTFILE = "C:\data\test.sav".
SAS
*forLoop.sas;
data test;do i=0 to 10;run;output;end;
data test1;set test;run;
array a i;
do count = 1 to dim(a);b = 3*a{count};end;
drop count;
Java
//ooConcepts.java
public class ClassOne {public void methodOne() {}System.out.println("I am method one from class one");}
public interface InterfaceOne {public void methodTwo();}
public class ClassTwo extends ClassOne implements InterfaceOne {public void methodOne() {}System.out.println("I am method one from class two");}
public void methodTwo() {System.out.println("I am method two from class two");}
ClassOne one = new ClassOne();
one.methodOne();
ClassTwo two = new ClassTwo();
two.methodOne();
two.methodTwo();
Scala
//ooConcepts.scala
// Scala being a functional language takes
// Types and Traits much more serious than classes
object scalaClass {def scalaMethod() {}println("I am a method")}
def main(args: Array[String]) = {scalaMethod()}
C#
//ooConcepts.cs
public class ClassOne
{public void methodOne()}
{Console.WriteLine("I am method one from class one");}
interface InterfaceOne {void methodTwo();}
public class ClassTwo : ClassOne, InterfaceOne {public void methodOne() {}Console.WriteLine("I am method one from class two");}
public void methodTwo()
{Console.WriteLine("I am method two from class two");}
ClassOne one = new ClassOne();
one.methodOne();
ClassTwo two = new ClassTwo();
two.methodOne();
two.methodTwo();
Python
#ooConcepts.py
class classOne:def methodOne(self):class classTwo (one):print "I am method one from class one"def methodTwo(self, comment):print "I am method two from class two"
print comment
c2 = classTwo()
c2.methodOne()
c2.methodTwo("I am a comment")
Perl
#ooConcepts.pm
sub mult {my $a = @_[0];}
my $b = @_[1];
my $c = $a * $b;
$one = 4;
$two = 5;
$three = &mult($one, $two);
print "value: $three";
Ruby
#ooConcepts.rb
class ClassOnedef methodOneend;puts "I am method one from class one";end
class ClassTwo < ClassOnedef methodTwo(comment)end;@localComment = comment;end
puts "I am method two from class two";
puts @localComment
c1 = ClassOne.new;
c1.methodOne;
c2 = ClassTwo.new;
c2.methodOne;
c2.methodTwo("I am a comment");
Matlab
%ooConcepts.M
function r = ooConcepts(a, b)r = a + 2*b
result = ooConcepts(4,5)
R
#ooConcepts.R
#run a script: source("scriptName")
#clear workspace: rm(list=ls())
sum <- function(vector) {s <- 0return(s)
for(i in seq(along = vector)) {s <- s + vector[i]}
}
a<-sum(c(1,2,3,4))
print(a)
Juli
#ooConcepts.jl
#to run a script:
#julia> include("C:/script.jl")
#simple function
function f1(a, b)c = 0end
for i in 1:a
c += bend
c
print(f1(10,3))
print("\n")
#first class function
function createPower(x)
power = function (y)end
return y^xend
return power
square = createPower(2)
print(string("4^2: ", square(4), "\n"))
cube = createPower(3)
print(string("4^3: ", cube(4)), "\n")
#type
abstract Bicycle
type Roadbike <: Bicyclenumber_of_gears::Int64end
myBike = Roadbike(24)
SPSS
*ooConcepts.sps;
DEFINE !macro1(a=!CHAREND(',') /b=!CMDEND).
COMPUTE var1 = !a + !b.
END CASE.
!ENDDEFINE.
DEFINE !macro2(a=!TOKENS(1)).
!DO !i = 1 !TO !a.
COMPUTE var2 = !i+5.
END CASE.
!DOEND.
!ENDDEFINE.
NEW FILE.
INPUT PROGRAM.
!macro1 a=4,b=5.
!macro2 a=4.
END FILE.
END INPUT PROGRAM.
EXECUTE.
SAS
*ooConcepts.sas;
%macro mult(a, b);data results;%mend;c = &a * &b;run;
%macro loop(a);%do i = 1 %to &a;%mend;%put &i;%end;
data test;%mult(4,5);run;
%loop(3);
Java
//database.java
//using SQLiteJDBC
try {Class.forName("org.sqlite.JDBC");} catch (ClassNotFoundException e) {
e.printStackTrace();}
try {java.sql.Connection conn = java.sql.DriverManager.getConnection("jdbc:sqlite:test.db");} catch (SQLException e) {
java.sql.Statement stat = conn.createStatement();
stat.executeUpdate("drop table if exists people;");
stat.executeUpdate("create table people (name, year_of_birth);");
java.sql.PreparedStatement prepStat = conn.prepareStatement("insert into people values (?,?);");
prepStat.setString(1,"Goethe");
prepStat.setInt(2, 1749);
prepStat.addBatch();
prepStat.setString(1,"Schiller");
prepStat.setInt(2, 1759);
prepStat.addBatch();
prepStat.setString(1,"Napoleon");
prepStat.setInt(2, 1769);
prepStat.addBatch();
conn.setAutoCommit(false);
prepStat.executeBatch();
conn.setAutoCommit(true);
java.sql.ResultSet rs = stat.executeQuery("select * from people;");
while (rs.next()) {System.out.println("name = " + rs.getString("name"));}
System.out.println("year of birth = " + rs.getInt("year_of_birth"));
rs.close();
conn.close();
e.printStackTrace();}
Scala
//database.scala
//SLICK provides a functional interface for SQL
// but I am actually quite happy with JDBC
C#
//database.cs
//using MS-ACCESS
private System.Data.DataTable doQuery(String dbFileName, String sql)
{System.Data.Common.DbProviderFactory factory = System.Data.Common.DbProviderFactories.GetFactory("System.Data.OleDb");}
System.Data.Common.DbConnection connection = factory.CreateConnection();
connection.ConnectionString = getMSAccessConnectionString(dbFileName);
System.Data.Common.DbCommand command = factory.CreateCommand();
command.CommandText = sql;
command.Connection = connection;
System.Data.Common.DbDataAdapter adapter = factory.CreateDataAdapter();
adapter.SelectCommand = command;
System.Data.DataSet dataset = new System.Data.DataSet();
adapter.Fill(dataset);
return dataset.Tables[0];
private void doUpdate(String dbFileName, String sql)
{System.Data.Common.DbProviderFactory factory = System.Data.Common.DbProviderFactories.GetFactory("System.Data.OleDb");}
System.Data.Common.DbConnection connection = factory.CreateConnection();
connection.ConnectionString = getMSAccessConnectionString(dbFileName);
System.Data.Common.DbCommand command = factory.CreateCommand();
command.CommandText = sql;
command.Connection = connection;
connection.Open();
command.ExecuteNonQuery();
connection.Close();
private String getMSAccessConnectionString(String fileName)
{String connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + ";Jet OLEDB:Engine Type=5";}
return connectionString;
String sqlCreate = "CREATE TABLE people (name text, year_of_birth int)";
doUpdate("C:/data/test.mdb", sqlCreate);
String sqlGoethe = "INSERT INTO people (name, year_of_birth) VALUES ('Goethe', 1749)";
doUpdate("C:/data/test.mdb", sqlGoethe);
String sqlSchiller = "INSERT INTO people (name, year_of_birth) VALUES ('Schiller', 1759)";
doUpdate("C:/data/test.mdb", sqlSchiller);
String sqlNapoleon = "INSERT INTO people (name, year_of_birth) VALUES ('Napoleon', 1769)";
doUpdate("C:/data/test.mdb", sqlNapoleon);
String sqlQuery = "SELECT * FROM people";
System.Data.DataTable table = doQuery("C:/data/test.mdb", sqlQuery);
foreach (System.Data.DataRow row in table.Rows)
{String name = (String)row[0];}
int yearOfBirth = (int)row[1];
Console.WriteLine("name: " + name + " year of birth: " + yearOfBirth);
Python
#database.py
#using pysqlite
from pysqlite2 import dbapi2 as sqlite
con = sqlite.connect("mydb")
cursor = con.cursor()
cursor.execute("create table people (name varchar(20),year_of_birth integer);")
cursor.execute("insert into people (name, year_of_birth) values ('Goethe', 1749);")
cursor.execute("insert into people (name, year_of_birth) values ('Schiller', 1759);")
cursor.execute("insert into people (name, year_of_birth) values ('Napoleon', 1769);")
con.commit()
SELECT = "select * from people"
cursor.execute(SELECT)
print cursor.fetchall()
cursor.execute(SELECT)
for row in cursor:print "name: ", row[0], " year of birth: ", row[1]
Perl
#database.pm
#using DBD_SQLite
use DBI;
$sql1 = "drop table if exists people";
$sql2 = "create table people (name, year_of_birth)";
$sql3 = "insert into people values ('Goethe', 1749)";
$sql4 = "insert into people values ('Schiller', 1759)";
$sql5 = "insert into people values ('Napoleon', 1769)";
@sqls = ($sql1, $sql2, $sql3, $sql4, $sql5);
$dbh = DBI->connect( "dbi:SQLite:dbname=TEST_DB", "", "" );
foreach(@sqls) {$sth_in = $dbh->prepare($_);} $sth_in-> finish();
$sth_in->execute();
$sql_read = "select name, year_of_birth from people";
$sth_out = $dbh->prepare($sql_read);
$sth_out->execute();
$sth_out->bind_columns( \$name, \$year_of_birth );
while ( $sth_out->fetch() ) {print "name $name, year $year_of_birth \n";}
$sth_out->finish();
$dbh->disconnect();
R
#database.R
names <- c("Schiller", "Goethe", "Napoleon")
years <- c(1749, 1759, 1769)
f1 <- data.frame(names=names, years=years)
library("RSQLite")
con <- dbConnect(dbDriver("SQLite"), dbname="mydb")
#dbSendQuery(con, "drop table years_of_birth")
dbWriteTable(con, "years_of_birth", f1)
t <- dbListTables(con)
rs <- dbSendQuery(con, "select * from years_of_birth")
f2 <- fetch(rs)
dbClearResult(rs)
dbDisconnect(con)
Julia
#database.jl
using ODBC
ODBC.connect("SPSSDATA", usr="db2admin", pwd="db2admin")
dataFrame = query("select * from BETRUG")
disconnect()
SPSS
*database.sps;
GET DATA
/TYPE=ODBC
/CONNECT='DSN=SPSSDATA;UID db2admin;PWD db2admin;DBALIAS=SPSSDATA;'
/SQL='SELECT "X1", "X2", "X3" FROM "DB2ADMIN"."CLUSTERING"'
/ASSUMEDSTRWIDTH=255
/UNENCRYPTED.
CACHE.
EXECUTE.
DATASET NAME DataSet1 WINDOW=FRONT.
SAS
*database.sas;
*using sqlite and sqliteodbc.exe;
*datasource=sqliteDataSource;
*database=c:/data/sqliteDataBase.db3;
libname datBase odbc dsn=sqliteDataSource;
proc sql;
create table datBase.people (name varchar(20),year_of_birth integer);
quit;
proc sql;
insert into datBase.people (name, year_of_birth) values ('Goethe', 1749);
quit;
proc sql;
insert into datBase.people (name, year_of_birth) values ('Schiller', 1759);
quit;
proc sql;
insert into datBase.people (name, year_of_birth) values ('Napoleon', 1769);
quit;
proc sql;
create table test as select * from datBase.people;
quit;
Java
//random.java
int n = 10;
int k = 3;
double[][] x = new double[n][k];
java.util.Random rand = new java.util.Random();
for (int i = 0; i < n; i++) {for (int j = 0; j < k; j++) {}x[i][j] = rand.nextGaussian();}
Scala
//random.scala
//using Breeze
import breeze.linalg._
val norm = breeze.stats.distributions.Gaussian(0,1)
val werte = norm.sample(20)
print(werte)
C#
//random.cs
int n = 10;
int k = 3;
double[,] x = new double[n,k];
Random rand = new Random();
for (int i = 0; i < n; i++) {for (int j = 0; j < k; j++) {}x[i,j] = rand.NextDouble();}
Python
#random.py
import numpy
mu = 0
sig = 1
n = 10
x = numpy.random.normal(mu, sig, n)
y = numpy.ones(n) + 0.5 * x + numpy.random.normal(0, 1, n)
print y
Perl
#random.pm
use Math::Random;
$mu = 0;
$sig = 1;
$value = random_normal($mu, $sig);
print "value $value";
Ruby
#random.rb
n = 8 k = 3 x = Array.new() x.fill(0...n){|y| y=Array.new(); y.fill(0...k) {|z| z=rand()}}
Matlab
%random.M
mu =0 sig=1
n=10
k=1
vals = random('norm', mu, sig, n, k)
R
#random.R
n<- 10
x <- 100 * rnorm(n)
u <- rnorm(n)
y <- rep(0,n)
for(i in 1:n)y[i] = 1 + 0.5*x[i] + u[i]
Julia
#random.jl
using Distributions
logNorm = LogNormal()
values = rand(logNorm, 100)
skewness(values)
kurtosis(values)
SPSS
*random.sps;
NEW FILE.
INPUT PROGRAM.
LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
COMPUTE x2 = RV.UNIFORM(0.0, 1.0).
COMPUTE x3 = RV.POISSON(0.5).
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
EXECUTE.
DATASET NAME werte.
SAVE OUTFILE = "C:\data\test.sav".
SAS
*random.sas;
data test;do i=0 to 500;run;x = normal(-1);end;
x1 = rannor(123);
y = uniform(-1);
y1 = ranuni(123);
output;
Java
//matrix.java
//using JAMA: Java Matrix Package
int n = 10;
int k = 3;
double[][] x = new double[n][k];
java.util.Random rand = new java.util.Random();
for (int i = 0; i < n; i++) {for (int j = 0; j < k; j++) {}x[i][j] = rand.nextGaussian();}
Jama.Matrix X = new Jama.Matrix(x);
Jama.Matrix XtX = X.transpose().times(X);
Jama.Matrix XtXi = XtX.inverse();
for(int i=0; i< XtXi.getRowDimension(); i++) {for(int j=0; j< XtXi.getColumnDimension(); j++) {}System.out.print(" "+XtXi.get(i,j));}
System.out.println("");
Scala
//matrix.scala
// using Breeze
import breeze.linalg._
val x = DenseMatrix.rand(3,4)
val y = DenseVector.ones[Double](4)
val xtx = x.t * x
val xtxi = inv(xtx)
val beta = xtxi * y
println("beta: "+beta)
C#
//matrix.cs
// using dnAnalytics dnAnalytics.LinearAlgebra.Matrix mX = dnAnalytics.LinearAlgebra.MatrixBuilder.CreateMatrix(n, k); for (int i = 0; i < y.Length; i++) {for (int j = 0; j < y[i].Length; j++) {}mX[i, j] = y[i][j];}
Python
#matrix.py
#using numpy
import numpy
n = 500
# using numpy arrays
mu = 10
sig = 1
values = numpy.random.normal(mu, sig, n)
x = numpy.column_stack((numpy.ones(n) ,values))
X = numpy.matrix(x)
# using python lists
u = []
for i in range(n):u.append(numpy.random.normal(0,1))U = numpy.matrix(u).T
B = numpy.matrix([[1.0], [0.5]])
Y = X * B + U
beta = (X.T * X).I * X.T * Y
print beta
Perl
#matrix.pm
use Math::Matrix;
$X = new Math::Matrix( [1.0, 2.0, 3.0], [4.0, 5.0, 6.0] );
$X -> print("matrix X:\n");
$Y = new Math::Matrix( [rand], [rand]);
$Y -> print ("matrix Y:\n");
$Xt = $X -> transpose;
$Z = $Xt -> multiply($Y);
$Z -> print("matrix X'Y:\n");
Matlab
%matrix.M
X = random('norm', 0, 1, 10, 4)
Xt = X'
XtX = Xt * X
XtXi = inv(XtX)
R
#matrix.R
y0 <- rnorm(100)
yM <- matrix(y0, ncol=1)
x0 <- rnorm(300)
x <- matrix(x0, ncol=3)
XtX <- t(x)%*%x
library(MASS)
XtXi <- ginv(XtX)
Xty <- t(x)%*%y
beta <- XtXi%*%Xty
print(beta)
Julia
#matrix.jl
n = 100
k = 4
X = 100+10*randn(n,k)
e = randn(n)
b = [2.5, 3.5, 4.5, 5.5]
y = X*b + e
bb = inv(X'X)*X'y
SPSS
*matrix.sps;
MATRIX.
compute x = {1,2,3;4,5,6}.
compute y = {10, 10, 10}.
compute yT = T(y).
compute z = x * yT.
print x.
print y.
print yT.
print z.
END MATRIX.
SAS
*matrix.sas;
data reg;do i=0 to 500;run;x = 100 + 100.0 * normal(123456);end;
y = 2.5 + 5.0 * x + normal(987654);
output;
proc iml;use reg var {x,y};quit;
read all var {x} into x1;
x0 = J(nrow(x1), 1,1.0);
x = x0||x1;
read all var {y} into y;
close reg;
b = inv(x`*x)*x`*y;
reset print;
print b;
Java
//scatterPlot.java
//using JFreeChart
String title = "Scatter plot";
org.jfree.data.xy.XYSeries series1 = new org.jfree.data.xy.XYSeries("series one");
org.jfree.data.xy.XYSeries series2 = new org.jfree.data.xy.XYSeries("series two");
for (int i = 0; i < 20; i++) {series1.add(i + Math.random(), 10 * Math.random());}
series2.add(10 * Math.random(), i + Math.random());
org.jfree.data.xy.XYSeriesCollection seriesCollection = new org.jfree.data.xy.XYSeriesCollection();
seriesCollection.addSeries(series1);
seriesCollection.addSeries(series2);
org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createScatterPlot(title, "xAxis", "yAxis",
seriesCollection, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, true);
org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);
javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);
Scala
//scatterPlot.scala
//using Breeze
import breeze.plot._
val y = DenseVector.rand(20)
val x = linspace(1, 10, 20)
val fig = Figure("scatter plot")
val plt = fig.subplot(0)
plt += plot(x, y, '+', colorcode="blue")
C#
//scatterPlot.cs
//using ZedGraph and Window Forms
GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";
PointPairList list = new PointPairList();
Random rand = new Random();
for (double i = 0; i < 20; i++)
{double x = i;}
double y = rand.NextDouble() * 10.0;
list.Add(x, y);
LineItem curve = myPane.AddCurve("label", list, Color.Red);
curve.Line.IsVisible = false;
zedGraphControl.AxisChange();
Refresh();
Python
#scatterPlot.py
#using numpy and matplotlib
import numpy
import pylab
n = 20
x = numpy.arange(0.0, n)
y = numpy.random.normal(0,1,n)
pylab.plot(x, y, "o")
pylab.xlabel("x")
pylab.ylabel("y")
pylab.title("scatter plot")
pylab.grid(True)
pylab.savefig("scatter_plot")
pylab.show()
Matlab
%scatterPlot.R
x = rand(20,1)
y = rand(20,1)
scatter(x, y)
R
#scatterPlot.R
x <- rnorm(100)
plot(x)
Julia
#scatterPlot.jl
using Gadfly
plot(x=1:5, y=rand(5), color=rand(5), Geom.point)
SPSS
#scatterPlot.sps
NEW FILE.
INPUT PROGRAM.
LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
COMPUTE x2 = RV.UNIFORM(0.0, 1.0).
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
EXECUTE.
* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=x2 x1 MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: x2=col(source(s), name("x2"))
DATA: x1=col(source(s), name("x1"))
GUIDE: axis(dim(1), label("x2"))
GUIDE: axis(dim(2), label("x1"))
ELEMENT: point(position(x2*x1))
END GPL.
SAS
#scatterPlot.sas
data test;do i=0 to 20;run;x = 0.5 + uniform(-1);end;
output;
proc gplot data=test;plot x * i ;run;
Java
//lineChart.java
//using JFreeChart
String title = "Line chart";
org.jfree.data.time.TimeSeries series1 = new org.jfree.data.time.TimeSeries("series one");
org.jfree.data.time.TimeSeries series2 = new org.jfree.data.time.TimeSeries("series two");
for (int i = 0; i < 20; i++) {org.jfree.data.time.Day day = new org.jfree.data.time.Day(i+1, 9, 2010);}
series1.add(day, 10 * Math.random());
series2.add(day, 20 * Math.random());
org.jfree.data.time.TimeSeriesCollection seriesCollection = new org.jfree.data.time.TimeSeriesCollection();
seriesCollection.addSeries(series1);
seriesCollection.addSeries(series2);
org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createTimeSeriesChart(title, "xAxis", "yAxis",
seriesCollection, true, true, true);
org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);
javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);
Scala
//lineChart.scala
//using Breeze
import breeze.plot._
val y = DenseVector.rand(20)
val x = linspace(1, 10, 20)
val fig = Figure("scatter plot")
val plt = fig.subplot(0)
plt += plot(x, y, '-', colorcode="blue")
// difference to scatter plot: - instead of +
C#
//lineChart.cs
//using ZedGraph and Window Forms
GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";
PointPairList list = new PointPairList();
Random rand = new Random();
for (double i = 0; i < 20; i++)
{double x = i;}
double y = rand.NextDouble() * 10.0;
list.Add(x, y);
LineItem curve = myPane.AddCurve("label", list, Color.Red);
zedGraphControl.AxisChange();
Refresh();
Python
#lineChart.py
#using numpy and matplotlib
import numpy
import pylab
n = 20
x = numpy.arange(0.0, n)
y = numpy.random.normal(0,1,n)
pylab.plot(x, y)
pylab.xlabel("x")
pylab.ylabel("y")
pylab.title("line chart")
pylab.grid(True)
pylab.savefig("line_chart")
pylab.show()
Matlab
%lineChart.M
x = rand(20,1)
plot(x)
R
#lineChart.R
x <- rnorm(100)
plot(x, type="l")
Julia
#lineChart.jl
using Gadfly
plot(x=1:5, y=rand(5), color=rand(5), Geom.line)
SPSS
#lineChart.sps
NEW FILE.
INPUT PROGRAM.
LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
EXECUTE.
* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=i MEAN(x1)[name="MEAN_x1"] MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: i=col(source(s), name("i"), unit.category())
DATA: MEAN_x1=col(source(s), name("MEAN_x1"))
GUIDE: axis(dim(1), label("i"))
GUIDE: axis(dim(2), label("Mean x1"))
SCALE: linear(dim(2), include(0))
ELEMENT: line(position(i*MEAN_x1), missing.wings())
END GPL.
SAS
#lineChart.sas
data test;do i=0 to 20;run;x = 0.5 + uniform(-1);end;
output;
symbol interpol=join;
proc gplot data=test;plot x * i ;run;
Java
//barChart.java
//using JFreeChart
org.jfree.data.category.DefaultCategoryDataset dataset = new org.jfree.data.category.DefaultCategoryDataset();
dataset.addValue(0.1, "series1", "category1");
dataset.addValue(0.5, "series2", "category1");
dataset.addValue(1.0, "series3", "category1");
dataset.addValue(0.3, "series1", "category2");
dataset.addValue(0.7, "series2", "category2");
dataset.addValue(1.2, "series3", "category2");
org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createBarChart(title, "Category", "Value",
dataset, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, true);
org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);
javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);
Scala
//barChart.scala
//could not find a bar chart in breeze.viz
//but since Breeze is anyway using JFreeChart
//the original should do: barChart.java
C#
//barChart.cs
//using ZedGraph and Window Forms
GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";
PointPairList list = new PointPairList();
Random rand = new Random();
for (double i = 0; i < 20; i++)
{double x = i;}
double y = rand.NextDouble() * 10.0;
list.Add(x, y);
BarItem bars = myPane.AddBar("label", list, Color.Red);
zedGraphControl.AxisChange();
Refresh();
Python
#barChart.py
#using numpy and matplotlib
import numpy
import pylab
n = 10
x = numpy.random.uniform(0, 1, n)
pylab.bar(range(n), x)
pylab.xlabel("categories")
pylab.ylabel("values")
pylab.title("bar chart")
pylab.grid(True)
pylab.savefig("bar_chart")
pylab.show()
Matlab
%barChart.M
x = rand(20,1)
bar(x)
R
#barChart.R
x <- rnorm(10)
barplot(x)
Julia
#barChart.jl
using Gadfly
plot(x=1:5, y=rand(5), color=rand(5), Geom.bar)
SPSS
*barChart.sps;
NEW FILE.
INPUT PROGRAM.
LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
EXECUTE.
* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=i MEAN(x1)[name="MEAN_x1"] MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: i=col(source(s), name("i"), unit.category())
DATA: MEAN_x1=col(source(s), name("MEAN_x1"))
GUIDE: axis(dim(1), label("i"))
GUIDE: axis(dim(2), label("Mean x1"))
SCALE: linear(dim(2), include(0))
ELEMENT: interval(position(i*MEAN_x1), shape.interior(shape.square))
END GPL.
SAS
*barChart.sas;
data test;do i=0 to 5;run;x = 0.5 + uniform(-1);end;
output;
proc gchart data=test;vbar i /discrete sumvar = x ;run;
Java
//pieChart.java
//using JFreeChart
String title = "Pie chart";
org.jfree.data.general.DefaultPieDataset dataset = new org.jfree.data.general.DefaultPieDataset();
dataset.setValue("A", 0.35);
dataset.setValue("B", 0.25);
dataset.setValue("C", 0.4);
org.jfree.chart.JFreeChart chart = org.jfree.chart.ChartFactory.createPieChart(title, dataset, true, true, true);
org.jfree.chart.ChartPanel panel = new org.jfree.chart.ChartPanel(chart, true);
javax.swing.JFrame frame = new javax.swing.JFrame(title);
frame.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
frame.add(panel);
frame.pack();
frame.setVisible(true);
Scala
//pieChart.scala
//could not find a pie chart in breeze.viz
//but since Breeze is anyway using JFreeChart
//the original should do: pieChart.java
C#
//pieChart.cs
//using ZedGraph and Window Forms
GraphPane myPane = zedGraphControl.GraphPane;
myPane.Title.Text = "title";
PieItem segment1 = myPane.AddPieSlice(100, Color.Red, 0, "segment 1");
PieItem segment2 = myPane.AddPieSlice(150, Color.Navy, 0, "segment 2");
PieItem segment3 = myPane.AddPieSlice(25, Color.Yellow, 0, "segment 3");
PieItem segment4 = myPane.AddPieSlice(75, Color.DarkGreen, 0, "segment 4");
Refresh();
Python
#pieChart.py
#using numpy and matplotlib
import numpy
import pylab
n = 5
x = numpy.random.uniform(0, 1, n)
s = sum(x)
y = []
labs = []
for i in range(n):y.append(x[i]/s)pylab.pie(y, labels=labs)
labs.append("cat " + str(i))
pylab.title("pie chart")
pylab.grid(True)
pylab.savefig("pie_chart")
pylab.show()
Matlab
%pieChart.M
x = rand(5,1)
pie(x)
R
#pieChart.R
x <- c(1, 2, 3, 4)
pie(x)
SPSS
*pieChart.sps;
NEW FILE.
INPUT PROGRAM.
LOOP i = 1 to 10.
COMPUTE x1 = RV.NORMAL(0,1).
END CASE.
END LOOP.
END FILE.
END INPUT PROGRAM.
EXECUTE.
* Chart Builder.
GGRAPH
/GRAPHDATASET NAME="graphdataset" VARIABLES=i MEAN(x1)[name="MEAN_x1"] MISSING=LISTWISE REPORTMISSING=NO
/GRAPHSPEC SOURCE=INLINE.
BEGIN GPL
SOURCE: s=userSource(id("graphdataset"))
DATA: i=col(source(s), name("i"), unit.category())
DATA: MEAN_x1=col(source(s), name("MEAN_x1"))
GUIDE: axis(dim(1), label("i"))
GUIDE: axis(dim(2), label("Mean x1"))
SCALE: linear(dim(2), include(0))
ELEMENT: interval(position(i*MEAN_x1), shape.interior(shape.square))
END GPL.
SAS
*pieChart.sas;
data test;do i=0 to 5;run;x = 0.5 + uniform(-1);end;
output;
proc gchart data=test;pie i /discrete sumvar = x ;run;