Initial commt
commit
910b138668
|
|
@ -0,0 +1,3 @@
|
||||||
|
Subject/
|
||||||
|
target/
|
||||||
|
.idea/
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
|
||||||
|
<output url="file://$MODULE_DIR$/target/classes" />
|
||||||
|
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/Subject" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/lib" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
<orderEntry type="library" name="SPMF" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: org.jetbrains:annotations:15.0" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: com.opencsv:opencsv:4.0" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.6" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: org.apache.commons:commons-text:1.1" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.9.3" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
|
||||||
|
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
# DMVHomework - Note to the teacher
|
||||||
|
|
||||||
|
Three data mining algorithms have been used for this homework: Apriori, LCM and BIDE+.
|
||||||
|
Those three algorithms have each a separate class which are located in the package *algorithm* with their respective name.
|
||||||
|
Each class has a main method which is able to launch either one instance of the selected algorithm or an experiment on this algorithm.
|
||||||
|
|
||||||
|
In order to give inputs to those algorithms, the class *main.DatasetConverter* is able to take the raw dataset and format it in the good format for Apriori and LCM (*.transaction*) and for BIDE+ (*.sequence*).
|
||||||
|
For BIDE+, since the raw dataset contains named items, the formatted *.sequence* will have only numbers in it. A file with the same name and an extension *.seqinfo* gives the association between the name of the item and its ID.
|
||||||
|
|
||||||
|
To explore the patterns returned by those algorithms, several methods in the class *main.DataExplorer* helps to choose meaningful patterns.
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,35 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>fr.urao.dmv</groupId>
|
||||||
|
<artifactId>DMV Homework</artifactId>
|
||||||
|
<version>1</version>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.jetbrains</groupId>
|
||||||
|
<artifactId>annotations</artifactId>
|
||||||
|
<version>RELEASE</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.opencsv</groupId>
|
||||||
|
<artifactId>opencsv</artifactId>
|
||||||
|
<version>4.0</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<source>1.8</source>
|
||||||
|
<target>1.8</target>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
package algorithm;
|
||||||
|
|
||||||
|
import ca.pfv.spmf.algorithms.frequentpatterns.apriori.AlgoApriori;
|
||||||
|
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
|
||||||
|
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemsets;
|
||||||
|
import main.DataExplorer;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Apriori {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws InterruptedException {
|
||||||
|
System.out.println("Start Time: "+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME));
|
||||||
|
String experimentProduct = "Apriori-ProductID";
|
||||||
|
String experimentAisle = "Apriori-AisleID";
|
||||||
|
|
||||||
|
runApriori(DataExplorer.transactionsetAisle, DataExplorer.aprioriPatterns, 300d/131209);
|
||||||
|
|
||||||
|
runExperimentApriori(DataExplorer.transactionsetProduct, experimentProduct);
|
||||||
|
runExperimentApriori(DataExplorer.transactionsetAisle, experimentAisle);
|
||||||
|
|
||||||
|
System.out.println("Apriori Ended.");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void runApriori(String transactionPath, String patternPath, double minsup){
|
||||||
|
|
||||||
|
AlgoApriori apriori = new AlgoApriori();
|
||||||
|
try {
|
||||||
|
apriori.runAlgorithm(minsup,
|
||||||
|
transactionPath,
|
||||||
|
patternPath);
|
||||||
|
apriori.printStats();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void runExperimentApriori(String transactionPath, String experimentName){
|
||||||
|
List<Integer> listMinsup = new ArrayList<>();
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
for(int minsup = 300; minsup < 1000; minsup += 50){
|
||||||
|
listMinsup.add(minsup);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.shuffle(listMinsup);
|
||||||
|
|
||||||
|
try {
|
||||||
|
BufferedWriter writerTime = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + experimentName +"Time.perf"));
|
||||||
|
BufferedWriter writerCount = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + experimentName +"Count.perf"));
|
||||||
|
|
||||||
|
writerTime.write("minsup,time,algorithm");
|
||||||
|
writerTime.newLine();
|
||||||
|
|
||||||
|
writerCount.write("minsup,pattern_count,algorithm");
|
||||||
|
writerCount.newLine();
|
||||||
|
|
||||||
|
for(Integer minsup : listMinsup) {
|
||||||
|
AlgoApriori apriori = new AlgoApriori();
|
||||||
|
|
||||||
|
System.out.println("["+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME) +"]Step "+ step +"/"+ listMinsup.size() +" - Starting with minsup: "+ minsup);
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
Itemsets result = apriori.runAlgorithm(minsup/131209d, transactionPath, null);
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
|
||||||
|
List<Itemset> lvl = new ArrayList<>();
|
||||||
|
for(List<Itemset> level : result.getLevels()){
|
||||||
|
lvl.addAll(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
writerCount.write(minsup +","+ lvl.size() +","+ experimentName);
|
||||||
|
writerCount.newLine();
|
||||||
|
|
||||||
|
writerTime.write(minsup +","+ (end - start)/1000 +","+ experimentName);
|
||||||
|
writerTime.newLine();
|
||||||
|
|
||||||
|
step++;
|
||||||
|
}
|
||||||
|
|
||||||
|
writerTime.close();
|
||||||
|
writerCount.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
package algorithm;
|
||||||
|
|
||||||
|
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixspan.AlgoBIDEPlus;
|
||||||
|
import main.DataExplorer;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
public class BIDEPlus {
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
System.out.println("Start Time: "+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME));
|
||||||
|
|
||||||
|
runBIDEPlus(DataExplorer.transactionsetSequence, DataExplorer.bideplusPatterns);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void runBIDEPlus(String sequenceDataset, String patternOutput){
|
||||||
|
try {
|
||||||
|
AlgoBIDEPlus bideplus = new AlgoBIDEPlus();
|
||||||
|
double minsup = 400d/19999;
|
||||||
|
|
||||||
|
bideplus.setShowSequenceIdentifiers(false);
|
||||||
|
bideplus.runAlgorithm(sequenceDataset, minsup, patternOutput);
|
||||||
|
|
||||||
|
bideplus.printStatistics();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,97 @@
|
||||||
|
package algorithm;
|
||||||
|
|
||||||
|
import ca.pfv.spmf.algorithms.frequentpatterns.lcm.AlgoLCM;
|
||||||
|
import ca.pfv.spmf.algorithms.frequentpatterns.lcm.Dataset;
|
||||||
|
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
|
||||||
|
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemsets;
|
||||||
|
import main.DataExplorer;
|
||||||
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class LCM {
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
System.out.println("Start Time: "+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME));
|
||||||
|
String exprimentProduct = "LCM-ProductID";
|
||||||
|
|
||||||
|
runLCM(DataExplorer.transactionsetProduct, DataExplorer.lcmPatterns, 300d/131209);
|
||||||
|
|
||||||
|
runExperimentLCM(DataExplorer.transactionsetProduct, exprimentProduct);
|
||||||
|
|
||||||
|
System.out.println("LCM Ended.");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void runLCM(String datasetPath, @Nullable String output, double minsup){
|
||||||
|
|
||||||
|
AlgoLCM lcm = new AlgoLCM();
|
||||||
|
try {
|
||||||
|
Dataset dataset = new Dataset(datasetPath);
|
||||||
|
lcm.runAlgorithm(minsup, dataset, output);
|
||||||
|
lcm.printStats();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void runExperimentLCM(String transactionPath, String experimentName){
|
||||||
|
List<Integer> listMinsup = new ArrayList<>();
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
for(int minsup = 50; minsup < 1000; minsup += 50){
|
||||||
|
listMinsup.add(minsup);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.shuffle(listMinsup);
|
||||||
|
|
||||||
|
try {
|
||||||
|
BufferedWriter writerTime = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + "performanceLCMTime.csv"));
|
||||||
|
BufferedWriter writerCount = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + "performanceLCMCount.csv"));
|
||||||
|
|
||||||
|
writerTime.write("minsup,time,algorithm");
|
||||||
|
writerTime.newLine();
|
||||||
|
|
||||||
|
writerCount.write("minsup,pattern_count,algorithm");
|
||||||
|
writerCount.newLine();
|
||||||
|
|
||||||
|
for(Integer minsup : listMinsup) {
|
||||||
|
AlgoLCM lcm = new AlgoLCM();
|
||||||
|
// if true in next line it will find only closed itemsets, otherwise, all frequent itemsets
|
||||||
|
|
||||||
|
System.out.println("["+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME) +"]Step "+ step +"/"+ listMinsup.size() +" - Starting with minsup: "+ minsup);
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
Dataset dataset = new Dataset(transactionPath);
|
||||||
|
Itemsets itemsets = lcm.runAlgorithm(minsup/131209d, dataset, null);
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
|
||||||
|
|
||||||
|
List<Itemset> lvl = new ArrayList<>();
|
||||||
|
for(List<Itemset> level : itemsets.getLevels()){
|
||||||
|
lvl.addAll(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
writerCount.write(minsup +","+ lvl.size() +",LCM-ProductId");
|
||||||
|
writerCount.newLine();
|
||||||
|
|
||||||
|
writerTime.write(minsup +","+ (end - start)/1000 +","+ experimentName);
|
||||||
|
writerTime.newLine();
|
||||||
|
|
||||||
|
step++;
|
||||||
|
}
|
||||||
|
|
||||||
|
writerTime.close();
|
||||||
|
writerCount.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,616 @@
|
||||||
|
package main;
|
||||||
|
|
||||||
|
import com.opencsv.CSVReader;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class DataExplorer {
|
||||||
|
|
||||||
|
public static String workingDirectory = "/home/toshuumilia/tmp/testDMV/";
|
||||||
|
public static String rawdatasetProduct = workingDirectory + "order_products__train.csv";
|
||||||
|
public static String rawSequenceDataset = workingDirectory + "transactions_seq.txt";
|
||||||
|
|
||||||
|
public static String transactionsetProduct = workingDirectory + "trainProduct.transaction";
|
||||||
|
public static String transactionsetAisle = workingDirectory + "trainAisle.transaction";
|
||||||
|
public static String transactionsetSequence = workingDirectory + "customer.sequence";
|
||||||
|
|
||||||
|
public static String productInformation = workingDirectory + "products.csv";
|
||||||
|
public static String aisleInformation = workingDirectory + "aisles.csv";
|
||||||
|
public static String sequenceInformation = workingDirectory + "customer.seqinfo";
|
||||||
|
|
||||||
|
public static String lcmPatterns = workingDirectory + "pattern.lcm";
|
||||||
|
public static String aprioriPatterns = workingDirectory + "pattern.apriori";
|
||||||
|
public static String bideplusPatterns = workingDirectory + "pattern.bideplus";
|
||||||
|
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
Set<Integer> itemsItemsets = new HashSet<>();
|
||||||
|
Set<Integer> itemsSupport = new HashSet<>();
|
||||||
|
Set<Integer> itemsSequence = new HashSet<>();
|
||||||
|
Set<Integer> antecedents = new HashSet<>();
|
||||||
|
Set<Integer> consequents = new HashSet<>();
|
||||||
|
Set<Integer> aisles = new HashSet<>();
|
||||||
|
Set<Integer> itemExclusions = new HashSet<>();
|
||||||
|
|
||||||
|
// Create the dataset in the transaction format.
|
||||||
|
DatasetConverter.convertCSVIntoTransaction(rawdatasetProduct, transactionsetProduct);
|
||||||
|
DatasetConverter.sortTransaction(transactionsetProduct);
|
||||||
|
|
||||||
|
// Find the mamximum support in the product transaction set.
|
||||||
|
checkMaxSupport(transactionsetProduct);
|
||||||
|
|
||||||
|
// Replace the product id with its aisle id.
|
||||||
|
DatasetConverter.replaceIdByAisle(productInformation, rawdatasetProduct, transactionsetAisle);
|
||||||
|
DatasetConverter.sortTransaction(transactionsetAisle);
|
||||||
|
|
||||||
|
// Find the maximum support in the aisle transaction set
|
||||||
|
checkMaxSupport(transactionsetAisle);
|
||||||
|
|
||||||
|
// Tell how much items in the *product* transaction set has a support lower than 1% of the highest one.
|
||||||
|
separateItemOccurrence(workingDirectory +"itemOccurrence.csv", getItemOccurrence(transactionsetProduct), 187.27);
|
||||||
|
// Tell how much items in the *aisle* transaction set has a support lower than 1% of the highest one.
|
||||||
|
separateItemOccurrence(workingDirectory +"itemOccurrence.csv", getItemOccurrence(transactionsetAisle), 721.28);
|
||||||
|
|
||||||
|
|
||||||
|
// Find 100 itemests with the highest support and with at least 2 items in each itemsets.
|
||||||
|
findMaxSupportItemsets(lcmPatterns, 100, 1);
|
||||||
|
|
||||||
|
// Find the itemsets with the items I want.
|
||||||
|
itemsItemsets.addAll(Arrays.asList(24,83,120,123));
|
||||||
|
chooseItemset(lcmPatterns, itemsItemsets);
|
||||||
|
|
||||||
|
// Compute the support of an itemset
|
||||||
|
itemsSupport.addAll(Arrays.asList(24, 83));
|
||||||
|
computeSupport(itemsSupport, transactionsetAisle);
|
||||||
|
|
||||||
|
// Compute the confidence of an association rule
|
||||||
|
antecedents.addAll(Arrays.asList(24, 83, 120));
|
||||||
|
consequents.addAll(Arrays.asList(123));
|
||||||
|
computeConfidence(antecedents, consequents, transactionsetAisle);
|
||||||
|
|
||||||
|
// Get the name of some aisles
|
||||||
|
aisles.addAll(Arrays.asList(24, 83));
|
||||||
|
findNameAisle(aisles, aisleInformation);
|
||||||
|
|
||||||
|
// Create the sequence dataset
|
||||||
|
DatasetConverter.convertCSVIntoSequences(rawSequenceDataset, transactionsetSequence, sequenceInformation);
|
||||||
|
DatasetConverter.sortSequences(transactionsetSequence);
|
||||||
|
|
||||||
|
//Find the 10 most supported sequences containing at least 3 itemsets with at least one having 2 items, and exclude all sequences having the provided items.
|
||||||
|
itemExclusions.addAll(Arrays.asList(93, 474, 6, 66));
|
||||||
|
findMaxSupportSequence(bideplusPatterns, 10, 2, 2, itemExclusions);
|
||||||
|
|
||||||
|
itemsSequence.addAll(Arrays.asList(75, 251));
|
||||||
|
findNameProductSeq(itemsSequence, sequenceInformation);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Map<Integer, Integer> getItemOccurrence(String transactionPath){
|
||||||
|
Map<Integer, Integer> mapItemOccurrence = new HashMap<>();
|
||||||
|
|
||||||
|
if(transactionPath.contains(".transaction")){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||||
|
String line;
|
||||||
|
int numberTransaction = 0;
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] lineSplit = line.split(" ");
|
||||||
|
|
||||||
|
for(String split : lineSplit){
|
||||||
|
try {
|
||||||
|
Integer item = Integer.valueOf(split);
|
||||||
|
|
||||||
|
Integer numberOccurrence = mapItemOccurrence.getOrDefault(item, 0) + 1;
|
||||||
|
|
||||||
|
mapItemOccurrence.put(item, numberOccurrence);
|
||||||
|
} catch (NumberFormatException e){
|
||||||
|
System.err.println("NumberFormatException");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
numberTransaction++;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mapItemOccurrence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void checkMaxSupport(String transactionPath){
|
||||||
|
if(transactionPath.contains(".transaction")){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||||
|
Map<Integer, Integer> mapItemOccurrence = new HashMap<>();
|
||||||
|
String line;
|
||||||
|
int numberTransaction = 0;
|
||||||
|
int maxSupport = 0;
|
||||||
|
int idMaxSupport = -1;
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] lineSplit = line.split(" ");
|
||||||
|
|
||||||
|
for(String split : lineSplit){
|
||||||
|
try {
|
||||||
|
Integer item = Integer.valueOf(split);
|
||||||
|
|
||||||
|
Integer numberOccurrence = mapItemOccurrence.getOrDefault(item, 0) + 1;
|
||||||
|
|
||||||
|
mapItemOccurrence.put(item, numberOccurrence);
|
||||||
|
if (maxSupport < numberOccurrence) {
|
||||||
|
maxSupport = numberOccurrence;
|
||||||
|
idMaxSupport = item;
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e){
|
||||||
|
System.err.println("NumberFormatException");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
numberTransaction++;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("Number of Transactions: "+ numberTransaction);
|
||||||
|
System.out.println("Most present item: "+ idMaxSupport + " ("+ maxSupport +" items)");
|
||||||
|
System.out.println("Max relative support for Apriori: "+ ((double) maxSupport)/numberTransaction);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void separateItemOccurrence(String output, Map<Integer, Integer> mapItemOccurrence, double threshold){
|
||||||
|
int[] category = new int[2];
|
||||||
|
double separator = 721.78;
|
||||||
|
|
||||||
|
for(Integer itemId : mapItemOccurrence.keySet()){
|
||||||
|
int index = mapItemOccurrence.get(itemId) < threshold ? 0 : 1;
|
||||||
|
category[index]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
try{
|
||||||
|
BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||||
|
|
||||||
|
writer.write("interval,occurrence");
|
||||||
|
writer.newLine();
|
||||||
|
|
||||||
|
writer.write("[0.00;"+ threshold +"[,"+ category[0]);
|
||||||
|
writer.newLine();
|
||||||
|
|
||||||
|
writer.write("["+ threshold +";"+ (int) (threshold*100) +"],"+ category[1]);
|
||||||
|
writer.newLine();
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void chooseItemset(String patternPath, Set<Integer> itemsNeeded){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(patternPath));
|
||||||
|
StringBuilder stringBuilder = new StringBuilder("");
|
||||||
|
String line;
|
||||||
|
|
||||||
|
while((line = reader.readLine()) != null){
|
||||||
|
String[] linePart = line.split(" #SUP: ");
|
||||||
|
String[] items = linePart[0].split(" ");
|
||||||
|
Set<Integer> recognizedItems = new HashSet<>();
|
||||||
|
|
||||||
|
for(String item : items){
|
||||||
|
Integer item_id = Integer.valueOf(item);
|
||||||
|
if(itemsNeeded.contains(item_id)){
|
||||||
|
recognizedItems.add(item_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(recognizedItems.size() == itemsNeeded.size()){
|
||||||
|
stringBuilder.append(line);
|
||||||
|
stringBuilder.append('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(stringBuilder.toString());
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void computeSupport(Set<Integer> itemset, String dataset){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(dataset));
|
||||||
|
String line;
|
||||||
|
int support = 0;
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] lineParts = line.split(" ");
|
||||||
|
Set<Integer> recognizedItems = new HashSet<>();
|
||||||
|
|
||||||
|
for(String item : lineParts){
|
||||||
|
Integer item_id = Integer.valueOf(item);
|
||||||
|
if(itemset.contains(item_id)){
|
||||||
|
recognizedItems.add(item_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(recognizedItems.size() == itemset.size()){
|
||||||
|
support++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(prettyprintItemset(itemset) + ": Supp="+ support);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void computeConfidence(Set<Integer> antecedents, Set<Integer> consequents, String dataset){
|
||||||
|
Set<Integer> antUcon = new HashSet<>();
|
||||||
|
double antUconSupport = 0;
|
||||||
|
double antSupport = 0;
|
||||||
|
|
||||||
|
antUcon.addAll(antecedents);
|
||||||
|
antUcon.addAll(consequents);
|
||||||
|
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(dataset));
|
||||||
|
String line;
|
||||||
|
|
||||||
|
while((line = reader.readLine()) != null){
|
||||||
|
String[] items = line.split(" ");
|
||||||
|
Set<Integer> recognizedAntItems = new HashSet<>();
|
||||||
|
Set<Integer> recognizedAntUConItems = new HashSet<>();
|
||||||
|
|
||||||
|
for(String item : items){
|
||||||
|
Integer item_id = Integer.valueOf(item);
|
||||||
|
|
||||||
|
if(antecedents.contains(item_id)){
|
||||||
|
recognizedAntItems.add(item_id);
|
||||||
|
}
|
||||||
|
if(antUcon.contains(item_id)){
|
||||||
|
recognizedAntUConItems.add(item_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(recognizedAntItems.size() == antecedents.size()){
|
||||||
|
antSupport++;
|
||||||
|
}
|
||||||
|
if(recognizedAntUConItems.size() == antUcon.size()){
|
||||||
|
antUconSupport++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pretty print
|
||||||
|
StringBuilder stringBuilderAnt = new StringBuilder("");
|
||||||
|
for(Integer item_id : antecedents){
|
||||||
|
stringBuilderAnt.append(item_id);
|
||||||
|
stringBuilderAnt.append(',');
|
||||||
|
}
|
||||||
|
stringBuilderAnt.deleteCharAt(stringBuilderAnt.lastIndexOf(","));
|
||||||
|
|
||||||
|
// Pretty print
|
||||||
|
StringBuilder stringBuilderCon = new StringBuilder("");
|
||||||
|
for(Integer item_id : consequents){
|
||||||
|
stringBuilderCon.append(item_id);
|
||||||
|
stringBuilderCon.append(',');
|
||||||
|
}
|
||||||
|
stringBuilderCon.deleteCharAt(stringBuilderCon.lastIndexOf(","));
|
||||||
|
|
||||||
|
System.out.println("{"+ stringBuilderAnt +"} -> {"+ stringBuilderCon +"}: Conf="+ antUconSupport / antSupport);
|
||||||
|
System.out.println("Antecedents support: "+ antSupport);
|
||||||
|
System.out.println("Antecedents U Consequents support: "+ antUconSupport);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void findMaxSupportItemsets(String patternPath, int n, int excludePatternSizeLessThan){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(patternPath));
|
||||||
|
String line;
|
||||||
|
|
||||||
|
Map<String, Integer> mapISSupport = new HashMap<>();
|
||||||
|
|
||||||
|
while((line = reader.readLine()) != null){
|
||||||
|
String[] lineParts = line.split(" #SUP: ");
|
||||||
|
String[] items = lineParts[0].split(" ");
|
||||||
|
|
||||||
|
if(items.length > excludePatternSizeLessThan) {
|
||||||
|
Integer support = Integer.valueOf(lineParts[1]);
|
||||||
|
SortedSet<Integer> itemset = new TreeSet<>();
|
||||||
|
|
||||||
|
for (String item_id : items) {
|
||||||
|
itemset.add(Integer.valueOf(item_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
mapISSupport.put(prettyprintItemset(itemset), support);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
System.out.println(mapISSupport.keySet().size() +" interesting patterns found.");
|
||||||
|
List<String> nMaxItemset = findMaxSupportMap(new HashMap<>(mapISSupport), n);
|
||||||
|
|
||||||
|
for(String itemsetStr : nMaxItemset) {
|
||||||
|
System.out.println(itemsetStr);
|
||||||
|
}
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void findMaxSupportSequence(String patternPath, int n, int excludeSequenceSizeLessThan, int sizeOneItemsetAtLeast, Set<Integer> excludeItems){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(patternPath));
|
||||||
|
String line;
|
||||||
|
|
||||||
|
Map<String, Integer> mapSSupport = new HashMap<>();
|
||||||
|
|
||||||
|
while((line = reader.readLine()) != null){
|
||||||
|
String[] lineParts = line.split(" #SUP: ");
|
||||||
|
String[] itemsets = lineParts[0].split("-1");
|
||||||
|
|
||||||
|
if(itemsets.length > excludeSequenceSizeLessThan) {
|
||||||
|
Integer support = Integer.valueOf(lineParts[1]);
|
||||||
|
|
||||||
|
boolean itemExcluded = false;
|
||||||
|
boolean itemsetSizeExcluded = true;
|
||||||
|
|
||||||
|
for(String itemset : itemsets){
|
||||||
|
String[] items = itemset.split(" ");
|
||||||
|
int nbItems = 0;
|
||||||
|
|
||||||
|
for(String item : items){
|
||||||
|
if(!item.equals("")){
|
||||||
|
int itemID = Integer.valueOf(item);
|
||||||
|
|
||||||
|
for(Integer excludedItem : excludeItems) {
|
||||||
|
if (excludedItem.equals(itemID)){
|
||||||
|
itemExcluded = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nbItems++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(nbItems >= sizeOneItemsetAtLeast){
|
||||||
|
itemsetSizeExcluded = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!itemExcluded && !itemsetSizeExcluded) {
|
||||||
|
mapSSupport.put(prettyprintSequence(lineParts[0]), support);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
System.out.println(mapSSupport.keySet().size() +" interesting patterns found.");
|
||||||
|
|
||||||
|
List<String> nMaxItemset = findMaxSupportMap(new HashMap<>(mapSSupport), n);
|
||||||
|
|
||||||
|
for(String itemsetStr : nMaxItemset) {
|
||||||
|
System.out.println(itemsetStr);
|
||||||
|
}
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String> findMaxSupportMap(Map<String, Integer> mapSupport, int n){
|
||||||
|
List<String> nMaxItemset = new ArrayList<>();
|
||||||
|
|
||||||
|
for(int i = 0; i < n; i++){
|
||||||
|
String itemsetMax = "";
|
||||||
|
Integer supportMax = -1;
|
||||||
|
|
||||||
|
for(String itemset : mapSupport.keySet()){
|
||||||
|
Integer support = mapSupport.get(itemset);
|
||||||
|
if(supportMax < support){
|
||||||
|
itemsetMax = itemset;
|
||||||
|
supportMax = support;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!itemsetMax.equals("")) {
|
||||||
|
nMaxItemset.add(itemsetMax + "-Supp=" + supportMax);
|
||||||
|
mapSupport.remove(itemsetMax);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nMaxItemset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void findNameAisle(Set<Integer> aislesNeeded, String aisleInformations){
|
||||||
|
try {
|
||||||
|
StringBuilder stringBuilder = new StringBuilder("{*");
|
||||||
|
CSVReader csvReader = new CSVReader(new FileReader(aisleInformations));
|
||||||
|
String[] lineParts;
|
||||||
|
|
||||||
|
csvReader.readNext(); // Trash attributes name line
|
||||||
|
while((lineParts = csvReader.readNext()) != null){
|
||||||
|
if(aislesNeeded.contains(Integer.valueOf(lineParts[0]))){
|
||||||
|
stringBuilder.append(lineParts[1]);
|
||||||
|
stringBuilder.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stringBuilder.delete(stringBuilder.lastIndexOf(","), stringBuilder.lastIndexOf(",")+2);
|
||||||
|
stringBuilder.append("*}");
|
||||||
|
|
||||||
|
System.out.println(stringBuilder);
|
||||||
|
|
||||||
|
csvReader.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void findNameProductSeq(Set<Integer> productsNeeded, String sequenceInformationPath){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(sequenceInformationPath));
|
||||||
|
String line;
|
||||||
|
|
||||||
|
while((line = reader.readLine()) != null){
|
||||||
|
String[] lineParts = line.split(",");
|
||||||
|
Integer itemID = Integer.valueOf(lineParts[1]);
|
||||||
|
|
||||||
|
// if((lineParts[0] +","+ lineParts[1]).contains(",93")){
|
||||||
|
// System.out.println((lineParts[0] +","+ lineParts[1]));
|
||||||
|
// }
|
||||||
|
|
||||||
|
if(productsNeeded.contains(itemID)){
|
||||||
|
System.out.println(lineParts[1] +"=>"+ lineParts[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String prettyprintItemset(Set<Integer> itemset){
|
||||||
|
StringBuilder stringBuilder = new StringBuilder("{");
|
||||||
|
|
||||||
|
for(Integer item_id : itemset){
|
||||||
|
stringBuilder.append(item_id);
|
||||||
|
stringBuilder.append(',');
|
||||||
|
}
|
||||||
|
stringBuilder.deleteCharAt(stringBuilder.lastIndexOf(","));
|
||||||
|
stringBuilder.append('}');
|
||||||
|
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String prettyprintSequence(String rawSequence){
|
||||||
|
String[] itemsets = rawSequence.split("-1");
|
||||||
|
List<Set<Integer>> sequence = new ArrayList<>();
|
||||||
|
|
||||||
|
for(String rawitemset : itemsets){
|
||||||
|
String[] items = rawitemset.split(" ");
|
||||||
|
Set<Integer> itemset = new HashSet<>();
|
||||||
|
|
||||||
|
for(String item : items){
|
||||||
|
if(!item.equals("")){
|
||||||
|
itemset.add(Integer.valueOf(item));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence.add(itemset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return prettyprintSequence(sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String prettyprintSequence(List<Set<Integer>> sequence){
|
||||||
|
StringBuilder stringBuilder = new StringBuilder("[");
|
||||||
|
|
||||||
|
for(Set<Integer> itemset : sequence){
|
||||||
|
stringBuilder.append(prettyprintItemset(itemset));
|
||||||
|
stringBuilder.append(" ");
|
||||||
|
}
|
||||||
|
stringBuilder.deleteCharAt(stringBuilder.lastIndexOf(" "));
|
||||||
|
|
||||||
|
stringBuilder.append("]");
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// public static void countNumberTransactionWithMoreThanNItem(String transactionPath, int n){
|
||||||
|
// try{
|
||||||
|
// BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||||
|
// String line;
|
||||||
|
// int sum = 0;
|
||||||
|
//
|
||||||
|
// while((line = reader.readLine()) != null){
|
||||||
|
// sum += line.split(" ").length >= n ? 1 : 0;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// System.out.println("Number of transactions with at least "+ n +" items: "+ sum);
|
||||||
|
//
|
||||||
|
// reader.close();
|
||||||
|
// } catch (IOException e){
|
||||||
|
// e.printStackTrace();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// public static void countNumberItemInTransaction(File input, File output){
|
||||||
|
// if(input.getAbsolutePath().contains(".transaction")) {
|
||||||
|
// try {
|
||||||
|
// BufferedReader reader = new BufferedReader(new FileReader(input));
|
||||||
|
// BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||||
|
// String line;
|
||||||
|
//
|
||||||
|
// writer.write("length");
|
||||||
|
// writer.newLine();
|
||||||
|
//
|
||||||
|
// while ((line = reader.readLine()) != null) {
|
||||||
|
// String[] lineSplit = line.split(" ");
|
||||||
|
// writer.write(lineSplit.length +"");
|
||||||
|
// writer.newLine();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// writer.close();
|
||||||
|
// reader.close();
|
||||||
|
// } catch (IOException e) {
|
||||||
|
// e.printStackTrace();
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// throw new RuntimeException("");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public static void groupItemOccurrence(String output, Map<Integer, Integer> mapItemOccurrence){
|
||||||
|
// Map<String, Integer> mapIntervalCount = new HashMap<>();
|
||||||
|
// List<String> listInterval = new ArrayList<>();
|
||||||
|
//
|
||||||
|
// DecimalFormat nFormat = new DecimalFormat("#.##");
|
||||||
|
// DecimalFormatSymbols dfs = new DecimalFormatSymbols();
|
||||||
|
// dfs.setDecimalSeparator('.');
|
||||||
|
// nFormat.setDecimalFormatSymbols(dfs);
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// int nbInterval = 100;
|
||||||
|
// double max = 72178d +1;
|
||||||
|
//
|
||||||
|
// for(int i = 0; i < nbInterval; i++){
|
||||||
|
// String interval = "["+ nFormat.format((max/nbInterval)*i) +";"+ nFormat.format((max/nbInterval)*(i+1)) +"[";
|
||||||
|
// listInterval.add(interval);
|
||||||
|
// mapIntervalCount.put(interval, 0);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// for(Integer itemId : mapItemOccurrence.keySet()){
|
||||||
|
// String interval = listInterval.get((int) (mapItemOccurrence.get(itemId)/(max/nbInterval)));
|
||||||
|
//
|
||||||
|
// mapIntervalCount.put(interval, mapIntervalCount.getOrDefault(interval, 0)+1);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// try{
|
||||||
|
// BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||||
|
//
|
||||||
|
// writer.write("interval,occurrence");
|
||||||
|
// writer.newLine();
|
||||||
|
//
|
||||||
|
// for(String key : listInterval){
|
||||||
|
// if(!mapIntervalCount.get(key).equals(0)) {
|
||||||
|
// writer.write(key + "," + mapIntervalCount.get(key));
|
||||||
|
// writer.newLine();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// writer.close();
|
||||||
|
// } catch (IOException e) {
|
||||||
|
// e.printStackTrace();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,284 @@
|
||||||
|
package main;
|
||||||
|
|
||||||
|
import com.opencsv.CSVReader;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class DatasetConverter {
|
||||||
|
|
||||||
|
public static void convertCSVIntoTransaction(String input, String output){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(input));
|
||||||
|
BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
int idOrder;
|
||||||
|
int idProduct;
|
||||||
|
|
||||||
|
|
||||||
|
int lastIdOrder = -1;
|
||||||
|
boolean firstTransaction = true;
|
||||||
|
|
||||||
|
// Delete first line with the header.
|
||||||
|
reader.readLine();
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] lines = line.split(",");
|
||||||
|
|
||||||
|
idOrder = Integer.valueOf(lines[0]);
|
||||||
|
idProduct = Integer.valueOf(lines[1]);
|
||||||
|
|
||||||
|
|
||||||
|
if(lastIdOrder != idOrder){
|
||||||
|
if(firstTransaction){
|
||||||
|
firstTransaction = false;
|
||||||
|
} else {
|
||||||
|
writer.write("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
lastIdOrder = idOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.write(idProduct + " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
} catch (IOException exception){
|
||||||
|
exception.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void replaceIdByAisle(String product, String dataset, String output){
|
||||||
|
Map<Integer, Integer> mapProductIdAisleId = new HashMap<>();
|
||||||
|
|
||||||
|
try{
|
||||||
|
Set<Integer> setAisleKept = new HashSet<>();
|
||||||
|
BufferedReader reader;// = new BufferedReader(new FileReader(product));
|
||||||
|
BufferedWriter writer;// = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + "replace.tmp"));
|
||||||
|
String line;
|
||||||
|
int idOrder;
|
||||||
|
int previousIdOrder = -1;
|
||||||
|
int idProduct;
|
||||||
|
boolean firstTransaction = true;
|
||||||
|
|
||||||
|
CSVReader csvreader = new CSVReader(new FileReader(product));
|
||||||
|
String[] nline;
|
||||||
|
|
||||||
|
csvreader.readNext();
|
||||||
|
while ((nline = csvreader.readNext()) != null) {
|
||||||
|
try {
|
||||||
|
mapProductIdAisleId.put(Integer.valueOf(nline[0]), Integer.valueOf(nline[2]));
|
||||||
|
} catch (NumberFormatException e){
|
||||||
|
|
||||||
|
for(String l : nline) {
|
||||||
|
System.out.print(l);
|
||||||
|
if(!l.equals(nline[nline.length-1])) {
|
||||||
|
System.out.print(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reader = new BufferedReader(new FileReader(dataset));
|
||||||
|
writer = new BufferedWriter(new FileWriter(output));
|
||||||
|
|
||||||
|
reader.readLine();
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] lines = line.split(",");
|
||||||
|
|
||||||
|
idOrder = Integer.valueOf(lines[0]);
|
||||||
|
idProduct = Integer.valueOf(lines[1]);
|
||||||
|
|
||||||
|
|
||||||
|
if(previousIdOrder != idOrder){
|
||||||
|
if(firstTransaction){
|
||||||
|
firstTransaction = false;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
if(!setAisleKept.isEmpty()) {
|
||||||
|
for (Integer idAisle : setAisleKept) {
|
||||||
|
writer.write(idAisle.toString() + " ");
|
||||||
|
}
|
||||||
|
writer.write("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
setAisleKept.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
previousIdOrder = idOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
setAisleKept.add(mapProductIdAisleId.get(idProduct));
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the last transaction.
|
||||||
|
if(!setAisleKept.isEmpty()) {
|
||||||
|
for (Integer idAisle : setAisleKept) {
|
||||||
|
writer.write(idAisle.toString() + " ");
|
||||||
|
}
|
||||||
|
writer.write("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void sortTransaction(String transactionPath){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||||
|
StringBuilder stringBuilder = new StringBuilder("");
|
||||||
|
String line;
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] linePart = line.split(" ");
|
||||||
|
SortedSet<Integer> sortedSet = new TreeSet<>();
|
||||||
|
|
||||||
|
for(String item : linePart){
|
||||||
|
sortedSet.add(Integer.valueOf(item));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(Integer item_id : sortedSet){
|
||||||
|
stringBuilder.append(item_id);
|
||||||
|
stringBuilder.append(' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
stringBuilder.append('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
BufferedWriter writer = new BufferedWriter(new FileWriter(transactionPath));
|
||||||
|
|
||||||
|
writer.write(stringBuilder.toString());
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void convertCSVIntoSequences(String rawSequenceDataset, String outputSequence, String outputInfo){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(rawSequenceDataset));
|
||||||
|
BufferedWriter writerSequence = new BufferedWriter(new FileWriter(outputSequence));
|
||||||
|
BufferedWriter writerInfo = new BufferedWriter(new FileWriter(outputInfo));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
StringBuilder sequenceBuilder = new StringBuilder("");
|
||||||
|
int lineNumber = 0;
|
||||||
|
|
||||||
|
Map<String, Integer> mapNameId = new HashMap<>();
|
||||||
|
int freeID = 1;
|
||||||
|
int lastCustomerID = -1;
|
||||||
|
int lastOrderNumber = -1;
|
||||||
|
boolean firstSequence = true;
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] lineParts = line.split("\t");
|
||||||
|
Integer customerID = Integer.valueOf(lineParts[0]);
|
||||||
|
Integer orderNumber = Integer.valueOf(lineParts[1]);
|
||||||
|
|
||||||
|
if(!customerID.equals(lastCustomerID)){
|
||||||
|
if(firstSequence){
|
||||||
|
firstSequence = false;
|
||||||
|
}else{
|
||||||
|
sequenceBuilder.append("-2");
|
||||||
|
writerSequence.write(sequenceBuilder.toString());
|
||||||
|
writerSequence.newLine();
|
||||||
|
|
||||||
|
sequenceBuilder = new StringBuilder("");
|
||||||
|
}
|
||||||
|
|
||||||
|
lastCustomerID = customerID;
|
||||||
|
} else if(orderNumber <= lastOrderNumber){
|
||||||
|
System.out.println("Line "+ lineNumber +" :c");
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] items = lineParts[3].split(",");
|
||||||
|
for(String item : items){
|
||||||
|
if(item.equals("")){
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer itemID = mapNameId.getOrDefault(item, -1);
|
||||||
|
|
||||||
|
if(itemID.equals(-1)){
|
||||||
|
mapNameId.put(item, freeID);
|
||||||
|
itemID = freeID;
|
||||||
|
freeID++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sequenceBuilder.append(itemID);
|
||||||
|
sequenceBuilder.append(' ');
|
||||||
|
}
|
||||||
|
sequenceBuilder.append("-1 ");
|
||||||
|
|
||||||
|
lastOrderNumber = orderNumber;
|
||||||
|
lineNumber++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(String item : new TreeSet<>(mapNameId.keySet())){
|
||||||
|
writerInfo.write(item);
|
||||||
|
writerInfo.write(',');
|
||||||
|
writerInfo.write(mapNameId.get(item).toString());
|
||||||
|
writerInfo.newLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
writerSequence.close();
|
||||||
|
writerInfo.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void sortSequences(String sequenceDataset){
|
||||||
|
try{
|
||||||
|
BufferedReader reader = new BufferedReader(new FileReader(sequenceDataset));
|
||||||
|
StringBuilder stringBuilder = new StringBuilder("");
|
||||||
|
String line;
|
||||||
|
|
||||||
|
while ((line = reader.readLine()) != null){
|
||||||
|
String[] linePart = line.split(" -1 ");
|
||||||
|
|
||||||
|
for(String itemset : linePart) {
|
||||||
|
if(!itemset.contains("-2")) {
|
||||||
|
SortedSet<Integer> sortedSet = new TreeSet<>();
|
||||||
|
String[] items = itemset.split(" ");
|
||||||
|
|
||||||
|
for (String item : items) {
|
||||||
|
sortedSet.add(Integer.valueOf(item));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(Integer item_id : sortedSet){
|
||||||
|
stringBuilder.append(item_id);
|
||||||
|
stringBuilder.append(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stringBuilder.append("-1 ");
|
||||||
|
}
|
||||||
|
|
||||||
|
stringBuilder.delete(stringBuilder.lastIndexOf("-1"), stringBuilder.lastIndexOf("-1") +3);
|
||||||
|
|
||||||
|
stringBuilder.append("-2\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
BufferedWriter writer = new BufferedWriter(new FileWriter(sequenceDataset));
|
||||||
|
|
||||||
|
writer.write(stringBuilder.toString());
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue