Initial commt
commit
910b138668
|
|
@ -0,0 +1,3 @@
|
|||
Subject/
|
||||
target/
|
||||
.idea/
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
|
||||
<output url="file://$MODULE_DIR$/target/classes" />
|
||||
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/Subject" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/lib" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="SPMF" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.jetbrains:annotations:15.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.opencsv:opencsv:4.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-text:1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.9.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
# DMVHomework - Note to the teacher
|
||||
|
||||
Three data mining algorithms have been used for this homework: Apriori, LCM and BIDE+.
|
||||
Those three algorithms have each a separate class which are located in the package *algorithm* with their respective name.
|
||||
Each class has a main method which is able to launch either one instance of the selected algorithm or an experiment on this algorithm.
|
||||
|
||||
In order to give inputs to those algorithms, the class *main.DatasetConverter* is able to take the raw dataset and format it in the good format for Apriori and LCM (*.transaction*) and for BIDE+ (*.sequence*).
|
||||
For BIDE+, since the raw dataset contains named items, the formatted *.sequence* will have only numbers in it. A file with the same name and an extension *.seqinfo* gives the association between the name of the item and its ID.
|
||||
|
||||
To explore the patterns returned by those algorithms, several methods in the class *main.DataExplorer* helps to choose meaningful patterns.
|
||||
Binary file not shown.
|
|
@ -0,0 +1,35 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>fr.urao.dmv</groupId>
|
||||
<artifactId>DMV Homework</artifactId>
|
||||
<version>1</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains</groupId>
|
||||
<artifactId>annotations</artifactId>
|
||||
<version>RELEASE</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.opencsv</groupId>
|
||||
<artifactId>opencsv</artifactId>
|
||||
<version>4.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
package algorithm;
|
||||
|
||||
import ca.pfv.spmf.algorithms.frequentpatterns.apriori.AlgoApriori;
|
||||
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
|
||||
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemsets;
|
||||
import main.DataExplorer;
|
||||
|
||||
import java.io.*;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class Apriori {
|
||||
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
System.out.println("Start Time: "+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME));
|
||||
String experimentProduct = "Apriori-ProductID";
|
||||
String experimentAisle = "Apriori-AisleID";
|
||||
|
||||
runApriori(DataExplorer.transactionsetAisle, DataExplorer.aprioriPatterns, 300d/131209);
|
||||
|
||||
runExperimentApriori(DataExplorer.transactionsetProduct, experimentProduct);
|
||||
runExperimentApriori(DataExplorer.transactionsetAisle, experimentAisle);
|
||||
|
||||
System.out.println("Apriori Ended.");
|
||||
}
|
||||
|
||||
private static void runApriori(String transactionPath, String patternPath, double minsup){
|
||||
|
||||
AlgoApriori apriori = new AlgoApriori();
|
||||
try {
|
||||
apriori.runAlgorithm(minsup,
|
||||
transactionPath,
|
||||
patternPath);
|
||||
apriori.printStats();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void runExperimentApriori(String transactionPath, String experimentName){
|
||||
List<Integer> listMinsup = new ArrayList<>();
|
||||
int step = 1;
|
||||
|
||||
for(int minsup = 300; minsup < 1000; minsup += 50){
|
||||
listMinsup.add(minsup);
|
||||
}
|
||||
|
||||
Collections.shuffle(listMinsup);
|
||||
|
||||
try {
|
||||
BufferedWriter writerTime = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + experimentName +"Time.perf"));
|
||||
BufferedWriter writerCount = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + experimentName +"Count.perf"));
|
||||
|
||||
writerTime.write("minsup,time,algorithm");
|
||||
writerTime.newLine();
|
||||
|
||||
writerCount.write("minsup,pattern_count,algorithm");
|
||||
writerCount.newLine();
|
||||
|
||||
for(Integer minsup : listMinsup) {
|
||||
AlgoApriori apriori = new AlgoApriori();
|
||||
|
||||
System.out.println("["+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME) +"]Step "+ step +"/"+ listMinsup.size() +" - Starting with minsup: "+ minsup);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Itemsets result = apriori.runAlgorithm(minsup/131209d, transactionPath, null);
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
List<Itemset> lvl = new ArrayList<>();
|
||||
for(List<Itemset> level : result.getLevels()){
|
||||
lvl.addAll(level);
|
||||
}
|
||||
|
||||
writerCount.write(minsup +","+ lvl.size() +","+ experimentName);
|
||||
writerCount.newLine();
|
||||
|
||||
writerTime.write(minsup +","+ (end - start)/1000 +","+ experimentName);
|
||||
writerTime.newLine();
|
||||
|
||||
step++;
|
||||
}
|
||||
|
||||
writerTime.close();
|
||||
writerCount.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
package algorithm;
|
||||
|
||||
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixspan.AlgoBIDEPlus;
|
||||
import main.DataExplorer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
public class BIDEPlus {
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("Start Time: "+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME));
|
||||
|
||||
runBIDEPlus(DataExplorer.transactionsetSequence, DataExplorer.bideplusPatterns);
|
||||
}
|
||||
|
||||
private static void runBIDEPlus(String sequenceDataset, String patternOutput){
|
||||
try {
|
||||
AlgoBIDEPlus bideplus = new AlgoBIDEPlus();
|
||||
double minsup = 400d/19999;
|
||||
|
||||
bideplus.setShowSequenceIdentifiers(false);
|
||||
bideplus.runAlgorithm(sequenceDataset, minsup, patternOutput);
|
||||
|
||||
bideplus.printStatistics();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
package algorithm;
|
||||
|
||||
import ca.pfv.spmf.algorithms.frequentpatterns.lcm.AlgoLCM;
|
||||
import ca.pfv.spmf.algorithms.frequentpatterns.lcm.Dataset;
|
||||
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
|
||||
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemsets;
|
||||
import main.DataExplorer;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class LCM {
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("Start Time: "+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME));
|
||||
String exprimentProduct = "LCM-ProductID";
|
||||
|
||||
runLCM(DataExplorer.transactionsetProduct, DataExplorer.lcmPatterns, 300d/131209);
|
||||
|
||||
runExperimentLCM(DataExplorer.transactionsetProduct, exprimentProduct);
|
||||
|
||||
System.out.println("LCM Ended.");
|
||||
}
|
||||
|
||||
private static void runLCM(String datasetPath, @Nullable String output, double minsup){
|
||||
|
||||
AlgoLCM lcm = new AlgoLCM();
|
||||
try {
|
||||
Dataset dataset = new Dataset(datasetPath);
|
||||
lcm.runAlgorithm(minsup, dataset, output);
|
||||
lcm.printStats();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static void runExperimentLCM(String transactionPath, String experimentName){
|
||||
List<Integer> listMinsup = new ArrayList<>();
|
||||
int step = 1;
|
||||
|
||||
for(int minsup = 50; minsup < 1000; minsup += 50){
|
||||
listMinsup.add(minsup);
|
||||
}
|
||||
|
||||
Collections.shuffle(listMinsup);
|
||||
|
||||
try {
|
||||
BufferedWriter writerTime = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + "performanceLCMTime.csv"));
|
||||
BufferedWriter writerCount = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + "performanceLCMCount.csv"));
|
||||
|
||||
writerTime.write("minsup,time,algorithm");
|
||||
writerTime.newLine();
|
||||
|
||||
writerCount.write("minsup,pattern_count,algorithm");
|
||||
writerCount.newLine();
|
||||
|
||||
for(Integer minsup : listMinsup) {
|
||||
AlgoLCM lcm = new AlgoLCM();
|
||||
// if true in next line it will find only closed itemsets, otherwise, all frequent itemsets
|
||||
|
||||
System.out.println("["+ LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_TIME) +"]Step "+ step +"/"+ listMinsup.size() +" - Starting with minsup: "+ minsup);
|
||||
long start = System.currentTimeMillis();
|
||||
Dataset dataset = new Dataset(transactionPath);
|
||||
Itemsets itemsets = lcm.runAlgorithm(minsup/131209d, dataset, null);
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
|
||||
List<Itemset> lvl = new ArrayList<>();
|
||||
for(List<Itemset> level : itemsets.getLevels()){
|
||||
lvl.addAll(level);
|
||||
}
|
||||
|
||||
writerCount.write(minsup +","+ lvl.size() +",LCM-ProductId");
|
||||
writerCount.newLine();
|
||||
|
||||
writerTime.write(minsup +","+ (end - start)/1000 +","+ experimentName);
|
||||
writerTime.newLine();
|
||||
|
||||
step++;
|
||||
}
|
||||
|
||||
writerTime.close();
|
||||
writerCount.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,616 @@
|
|||
package main;
|
||||
|
||||
import com.opencsv.CSVReader;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class DataExplorer {
|
||||
|
||||
public static String workingDirectory = "/home/toshuumilia/tmp/testDMV/";
|
||||
public static String rawdatasetProduct = workingDirectory + "order_products__train.csv";
|
||||
public static String rawSequenceDataset = workingDirectory + "transactions_seq.txt";
|
||||
|
||||
public static String transactionsetProduct = workingDirectory + "trainProduct.transaction";
|
||||
public static String transactionsetAisle = workingDirectory + "trainAisle.transaction";
|
||||
public static String transactionsetSequence = workingDirectory + "customer.sequence";
|
||||
|
||||
public static String productInformation = workingDirectory + "products.csv";
|
||||
public static String aisleInformation = workingDirectory + "aisles.csv";
|
||||
public static String sequenceInformation = workingDirectory + "customer.seqinfo";
|
||||
|
||||
public static String lcmPatterns = workingDirectory + "pattern.lcm";
|
||||
public static String aprioriPatterns = workingDirectory + "pattern.apriori";
|
||||
public static String bideplusPatterns = workingDirectory + "pattern.bideplus";
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
Set<Integer> itemsItemsets = new HashSet<>();
|
||||
Set<Integer> itemsSupport = new HashSet<>();
|
||||
Set<Integer> itemsSequence = new HashSet<>();
|
||||
Set<Integer> antecedents = new HashSet<>();
|
||||
Set<Integer> consequents = new HashSet<>();
|
||||
Set<Integer> aisles = new HashSet<>();
|
||||
Set<Integer> itemExclusions = new HashSet<>();
|
||||
|
||||
// Create the dataset in the transaction format.
|
||||
DatasetConverter.convertCSVIntoTransaction(rawdatasetProduct, transactionsetProduct);
|
||||
DatasetConverter.sortTransaction(transactionsetProduct);
|
||||
|
||||
// Find the mamximum support in the product transaction set.
|
||||
checkMaxSupport(transactionsetProduct);
|
||||
|
||||
// Replace the product id with its aisle id.
|
||||
DatasetConverter.replaceIdByAisle(productInformation, rawdatasetProduct, transactionsetAisle);
|
||||
DatasetConverter.sortTransaction(transactionsetAisle);
|
||||
|
||||
// Find the maximum support in the aisle transaction set
|
||||
checkMaxSupport(transactionsetAisle);
|
||||
|
||||
// Tell how much items in the *product* transaction set has a support lower than 1% of the highest one.
|
||||
separateItemOccurrence(workingDirectory +"itemOccurrence.csv", getItemOccurrence(transactionsetProduct), 187.27);
|
||||
// Tell how much items in the *aisle* transaction set has a support lower than 1% of the highest one.
|
||||
separateItemOccurrence(workingDirectory +"itemOccurrence.csv", getItemOccurrence(transactionsetAisle), 721.28);
|
||||
|
||||
|
||||
// Find 100 itemests with the highest support and with at least 2 items in each itemsets.
|
||||
findMaxSupportItemsets(lcmPatterns, 100, 1);
|
||||
|
||||
// Find the itemsets with the items I want.
|
||||
itemsItemsets.addAll(Arrays.asList(24,83,120,123));
|
||||
chooseItemset(lcmPatterns, itemsItemsets);
|
||||
|
||||
// Compute the support of an itemset
|
||||
itemsSupport.addAll(Arrays.asList(24, 83));
|
||||
computeSupport(itemsSupport, transactionsetAisle);
|
||||
|
||||
// Compute the confidence of an association rule
|
||||
antecedents.addAll(Arrays.asList(24, 83, 120));
|
||||
consequents.addAll(Arrays.asList(123));
|
||||
computeConfidence(antecedents, consequents, transactionsetAisle);
|
||||
|
||||
// Get the name of some aisles
|
||||
aisles.addAll(Arrays.asList(24, 83));
|
||||
findNameAisle(aisles, aisleInformation);
|
||||
|
||||
// Create the sequence dataset
|
||||
DatasetConverter.convertCSVIntoSequences(rawSequenceDataset, transactionsetSequence, sequenceInformation);
|
||||
DatasetConverter.sortSequences(transactionsetSequence);
|
||||
|
||||
//Find the 10 most supported sequences containing at least 3 itemsets with at least one having 2 items, and exclude all sequences having the provided items.
|
||||
itemExclusions.addAll(Arrays.asList(93, 474, 6, 66));
|
||||
findMaxSupportSequence(bideplusPatterns, 10, 2, 2, itemExclusions);
|
||||
|
||||
itemsSequence.addAll(Arrays.asList(75, 251));
|
||||
findNameProductSeq(itemsSequence, sequenceInformation);
|
||||
}
|
||||
|
||||
public static Map<Integer, Integer> getItemOccurrence(String transactionPath){
|
||||
Map<Integer, Integer> mapItemOccurrence = new HashMap<>();
|
||||
|
||||
if(transactionPath.contains(".transaction")){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||
String line;
|
||||
int numberTransaction = 0;
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] lineSplit = line.split(" ");
|
||||
|
||||
for(String split : lineSplit){
|
||||
try {
|
||||
Integer item = Integer.valueOf(split);
|
||||
|
||||
Integer numberOccurrence = mapItemOccurrence.getOrDefault(item, 0) + 1;
|
||||
|
||||
mapItemOccurrence.put(item, numberOccurrence);
|
||||
} catch (NumberFormatException e){
|
||||
System.err.println("NumberFormatException");
|
||||
}
|
||||
}
|
||||
|
||||
numberTransaction++;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return mapItemOccurrence;
|
||||
}
|
||||
|
||||
public static void checkMaxSupport(String transactionPath){
|
||||
if(transactionPath.contains(".transaction")){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||
Map<Integer, Integer> mapItemOccurrence = new HashMap<>();
|
||||
String line;
|
||||
int numberTransaction = 0;
|
||||
int maxSupport = 0;
|
||||
int idMaxSupport = -1;
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] lineSplit = line.split(" ");
|
||||
|
||||
for(String split : lineSplit){
|
||||
try {
|
||||
Integer item = Integer.valueOf(split);
|
||||
|
||||
Integer numberOccurrence = mapItemOccurrence.getOrDefault(item, 0) + 1;
|
||||
|
||||
mapItemOccurrence.put(item, numberOccurrence);
|
||||
if (maxSupport < numberOccurrence) {
|
||||
maxSupport = numberOccurrence;
|
||||
idMaxSupport = item;
|
||||
}
|
||||
} catch (NumberFormatException e){
|
||||
System.err.println("NumberFormatException");
|
||||
}
|
||||
}
|
||||
|
||||
numberTransaction++;
|
||||
}
|
||||
|
||||
System.out.println("Number of Transactions: "+ numberTransaction);
|
||||
System.out.println("Most present item: "+ idMaxSupport + " ("+ maxSupport +" items)");
|
||||
System.out.println("Max relative support for Apriori: "+ ((double) maxSupport)/numberTransaction);
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
|
||||
public static void separateItemOccurrence(String output, Map<Integer, Integer> mapItemOccurrence, double threshold){
|
||||
int[] category = new int[2];
|
||||
double separator = 721.78;
|
||||
|
||||
for(Integer itemId : mapItemOccurrence.keySet()){
|
||||
int index = mapItemOccurrence.get(itemId) < threshold ? 0 : 1;
|
||||
category[index]++;
|
||||
}
|
||||
|
||||
try{
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||
|
||||
writer.write("interval,occurrence");
|
||||
writer.newLine();
|
||||
|
||||
writer.write("[0.00;"+ threshold +"[,"+ category[0]);
|
||||
writer.newLine();
|
||||
|
||||
writer.write("["+ threshold +";"+ (int) (threshold*100) +"],"+ category[1]);
|
||||
writer.newLine();
|
||||
|
||||
writer.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void chooseItemset(String patternPath, Set<Integer> itemsNeeded){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(patternPath));
|
||||
StringBuilder stringBuilder = new StringBuilder("");
|
||||
String line;
|
||||
|
||||
while((line = reader.readLine()) != null){
|
||||
String[] linePart = line.split(" #SUP: ");
|
||||
String[] items = linePart[0].split(" ");
|
||||
Set<Integer> recognizedItems = new HashSet<>();
|
||||
|
||||
for(String item : items){
|
||||
Integer item_id = Integer.valueOf(item);
|
||||
if(itemsNeeded.contains(item_id)){
|
||||
recognizedItems.add(item_id);
|
||||
}
|
||||
}
|
||||
|
||||
if(recognizedItems.size() == itemsNeeded.size()){
|
||||
stringBuilder.append(line);
|
||||
stringBuilder.append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(stringBuilder.toString());
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void computeSupport(Set<Integer> itemset, String dataset){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(dataset));
|
||||
String line;
|
||||
int support = 0;
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] lineParts = line.split(" ");
|
||||
Set<Integer> recognizedItems = new HashSet<>();
|
||||
|
||||
for(String item : lineParts){
|
||||
Integer item_id = Integer.valueOf(item);
|
||||
if(itemset.contains(item_id)){
|
||||
recognizedItems.add(item_id);
|
||||
}
|
||||
}
|
||||
|
||||
if(recognizedItems.size() == itemset.size()){
|
||||
support++;
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(prettyprintItemset(itemset) + ": Supp="+ support);
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void computeConfidence(Set<Integer> antecedents, Set<Integer> consequents, String dataset){
|
||||
Set<Integer> antUcon = new HashSet<>();
|
||||
double antUconSupport = 0;
|
||||
double antSupport = 0;
|
||||
|
||||
antUcon.addAll(antecedents);
|
||||
antUcon.addAll(consequents);
|
||||
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(dataset));
|
||||
String line;
|
||||
|
||||
while((line = reader.readLine()) != null){
|
||||
String[] items = line.split(" ");
|
||||
Set<Integer> recognizedAntItems = new HashSet<>();
|
||||
Set<Integer> recognizedAntUConItems = new HashSet<>();
|
||||
|
||||
for(String item : items){
|
||||
Integer item_id = Integer.valueOf(item);
|
||||
|
||||
if(antecedents.contains(item_id)){
|
||||
recognizedAntItems.add(item_id);
|
||||
}
|
||||
if(antUcon.contains(item_id)){
|
||||
recognizedAntUConItems.add(item_id);
|
||||
}
|
||||
}
|
||||
|
||||
if(recognizedAntItems.size() == antecedents.size()){
|
||||
antSupport++;
|
||||
}
|
||||
if(recognizedAntUConItems.size() == antUcon.size()){
|
||||
antUconSupport++;
|
||||
}
|
||||
}
|
||||
|
||||
// Pretty print
|
||||
StringBuilder stringBuilderAnt = new StringBuilder("");
|
||||
for(Integer item_id : antecedents){
|
||||
stringBuilderAnt.append(item_id);
|
||||
stringBuilderAnt.append(',');
|
||||
}
|
||||
stringBuilderAnt.deleteCharAt(stringBuilderAnt.lastIndexOf(","));
|
||||
|
||||
// Pretty print
|
||||
StringBuilder stringBuilderCon = new StringBuilder("");
|
||||
for(Integer item_id : consequents){
|
||||
stringBuilderCon.append(item_id);
|
||||
stringBuilderCon.append(',');
|
||||
}
|
||||
stringBuilderCon.deleteCharAt(stringBuilderCon.lastIndexOf(","));
|
||||
|
||||
System.out.println("{"+ stringBuilderAnt +"} -> {"+ stringBuilderCon +"}: Conf="+ antUconSupport / antSupport);
|
||||
System.out.println("Antecedents support: "+ antSupport);
|
||||
System.out.println("Antecedents U Consequents support: "+ antUconSupport);
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void findMaxSupportItemsets(String patternPath, int n, int excludePatternSizeLessThan){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(patternPath));
|
||||
String line;
|
||||
|
||||
Map<String, Integer> mapISSupport = new HashMap<>();
|
||||
|
||||
while((line = reader.readLine()) != null){
|
||||
String[] lineParts = line.split(" #SUP: ");
|
||||
String[] items = lineParts[0].split(" ");
|
||||
|
||||
if(items.length > excludePatternSizeLessThan) {
|
||||
Integer support = Integer.valueOf(lineParts[1]);
|
||||
SortedSet<Integer> itemset = new TreeSet<>();
|
||||
|
||||
for (String item_id : items) {
|
||||
itemset.add(Integer.valueOf(item_id));
|
||||
}
|
||||
|
||||
mapISSupport.put(prettyprintItemset(itemset), support);
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
|
||||
System.out.println(mapISSupport.keySet().size() +" interesting patterns found.");
|
||||
List<String> nMaxItemset = findMaxSupportMap(new HashMap<>(mapISSupport), n);
|
||||
|
||||
for(String itemsetStr : nMaxItemset) {
|
||||
System.out.println(itemsetStr);
|
||||
}
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void findMaxSupportSequence(String patternPath, int n, int excludeSequenceSizeLessThan, int sizeOneItemsetAtLeast, Set<Integer> excludeItems){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(patternPath));
|
||||
String line;
|
||||
|
||||
Map<String, Integer> mapSSupport = new HashMap<>();
|
||||
|
||||
while((line = reader.readLine()) != null){
|
||||
String[] lineParts = line.split(" #SUP: ");
|
||||
String[] itemsets = lineParts[0].split("-1");
|
||||
|
||||
if(itemsets.length > excludeSequenceSizeLessThan) {
|
||||
Integer support = Integer.valueOf(lineParts[1]);
|
||||
|
||||
boolean itemExcluded = false;
|
||||
boolean itemsetSizeExcluded = true;
|
||||
|
||||
for(String itemset : itemsets){
|
||||
String[] items = itemset.split(" ");
|
||||
int nbItems = 0;
|
||||
|
||||
for(String item : items){
|
||||
if(!item.equals("")){
|
||||
int itemID = Integer.valueOf(item);
|
||||
|
||||
for(Integer excludedItem : excludeItems) {
|
||||
if (excludedItem.equals(itemID)){
|
||||
itemExcluded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nbItems++;
|
||||
}
|
||||
}
|
||||
|
||||
if(nbItems >= sizeOneItemsetAtLeast){
|
||||
itemsetSizeExcluded = false;
|
||||
}
|
||||
}
|
||||
|
||||
if(!itemExcluded && !itemsetSizeExcluded) {
|
||||
mapSSupport.put(prettyprintSequence(lineParts[0]), support);
|
||||
}
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
|
||||
System.out.println(mapSSupport.keySet().size() +" interesting patterns found.");
|
||||
|
||||
List<String> nMaxItemset = findMaxSupportMap(new HashMap<>(mapSSupport), n);
|
||||
|
||||
for(String itemsetStr : nMaxItemset) {
|
||||
System.out.println(itemsetStr);
|
||||
}
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> findMaxSupportMap(Map<String, Integer> mapSupport, int n){
|
||||
List<String> nMaxItemset = new ArrayList<>();
|
||||
|
||||
for(int i = 0; i < n; i++){
|
||||
String itemsetMax = "";
|
||||
Integer supportMax = -1;
|
||||
|
||||
for(String itemset : mapSupport.keySet()){
|
||||
Integer support = mapSupport.get(itemset);
|
||||
if(supportMax < support){
|
||||
itemsetMax = itemset;
|
||||
supportMax = support;
|
||||
}
|
||||
}
|
||||
|
||||
if(!itemsetMax.equals("")) {
|
||||
nMaxItemset.add(itemsetMax + "-Supp=" + supportMax);
|
||||
mapSupport.remove(itemsetMax);
|
||||
}
|
||||
}
|
||||
|
||||
return nMaxItemset;
|
||||
}
|
||||
|
||||
public static void findNameAisle(Set<Integer> aislesNeeded, String aisleInformations){
|
||||
try {
|
||||
StringBuilder stringBuilder = new StringBuilder("{*");
|
||||
CSVReader csvReader = new CSVReader(new FileReader(aisleInformations));
|
||||
String[] lineParts;
|
||||
|
||||
csvReader.readNext(); // Trash attributes name line
|
||||
while((lineParts = csvReader.readNext()) != null){
|
||||
if(aislesNeeded.contains(Integer.valueOf(lineParts[0]))){
|
||||
stringBuilder.append(lineParts[1]);
|
||||
stringBuilder.append(", ");
|
||||
}
|
||||
}
|
||||
stringBuilder.delete(stringBuilder.lastIndexOf(","), stringBuilder.lastIndexOf(",")+2);
|
||||
stringBuilder.append("*}");
|
||||
|
||||
System.out.println(stringBuilder);
|
||||
|
||||
csvReader.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void findNameProductSeq(Set<Integer> productsNeeded, String sequenceInformationPath){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(sequenceInformationPath));
|
||||
String line;
|
||||
|
||||
while((line = reader.readLine()) != null){
|
||||
String[] lineParts = line.split(",");
|
||||
Integer itemID = Integer.valueOf(lineParts[1]);
|
||||
|
||||
// if((lineParts[0] +","+ lineParts[1]).contains(",93")){
|
||||
// System.out.println((lineParts[0] +","+ lineParts[1]));
|
||||
// }
|
||||
|
||||
if(productsNeeded.contains(itemID)){
|
||||
System.out.println(lineParts[1] +"=>"+ lineParts[0]);
|
||||
}
|
||||
}
|
||||
|
||||
reader.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static String prettyprintItemset(Set<Integer> itemset){
|
||||
StringBuilder stringBuilder = new StringBuilder("{");
|
||||
|
||||
for(Integer item_id : itemset){
|
||||
stringBuilder.append(item_id);
|
||||
stringBuilder.append(',');
|
||||
}
|
||||
stringBuilder.deleteCharAt(stringBuilder.lastIndexOf(","));
|
||||
stringBuilder.append('}');
|
||||
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
private static String prettyprintSequence(String rawSequence){
|
||||
String[] itemsets = rawSequence.split("-1");
|
||||
List<Set<Integer>> sequence = new ArrayList<>();
|
||||
|
||||
for(String rawitemset : itemsets){
|
||||
String[] items = rawitemset.split(" ");
|
||||
Set<Integer> itemset = new HashSet<>();
|
||||
|
||||
for(String item : items){
|
||||
if(!item.equals("")){
|
||||
itemset.add(Integer.valueOf(item));
|
||||
}
|
||||
}
|
||||
|
||||
sequence.add(itemset);
|
||||
}
|
||||
|
||||
return prettyprintSequence(sequence);
|
||||
}
|
||||
|
||||
private static String prettyprintSequence(List<Set<Integer>> sequence){
|
||||
StringBuilder stringBuilder = new StringBuilder("[");
|
||||
|
||||
for(Set<Integer> itemset : sequence){
|
||||
stringBuilder.append(prettyprintItemset(itemset));
|
||||
stringBuilder.append(" ");
|
||||
}
|
||||
stringBuilder.deleteCharAt(stringBuilder.lastIndexOf(" "));
|
||||
|
||||
stringBuilder.append("]");
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
// public static void countNumberTransactionWithMoreThanNItem(String transactionPath, int n){
|
||||
// try{
|
||||
// BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||
// String line;
|
||||
// int sum = 0;
|
||||
//
|
||||
// while((line = reader.readLine()) != null){
|
||||
// sum += line.split(" ").length >= n ? 1 : 0;
|
||||
// }
|
||||
//
|
||||
// System.out.println("Number of transactions with at least "+ n +" items: "+ sum);
|
||||
//
|
||||
// reader.close();
|
||||
// } catch (IOException e){
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
// public static void countNumberItemInTransaction(File input, File output){
|
||||
// if(input.getAbsolutePath().contains(".transaction")) {
|
||||
// try {
|
||||
// BufferedReader reader = new BufferedReader(new FileReader(input));
|
||||
// BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||
// String line;
|
||||
//
|
||||
// writer.write("length");
|
||||
// writer.newLine();
|
||||
//
|
||||
// while ((line = reader.readLine()) != null) {
|
||||
// String[] lineSplit = line.split(" ");
|
||||
// writer.write(lineSplit.length +"");
|
||||
// writer.newLine();
|
||||
// }
|
||||
//
|
||||
// writer.close();
|
||||
// reader.close();
|
||||
// } catch (IOException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// } else {
|
||||
// throw new RuntimeException("");
|
||||
// }
|
||||
// }
|
||||
|
||||
// public static void groupItemOccurrence(String output, Map<Integer, Integer> mapItemOccurrence){
|
||||
// Map<String, Integer> mapIntervalCount = new HashMap<>();
|
||||
// List<String> listInterval = new ArrayList<>();
|
||||
//
|
||||
// DecimalFormat nFormat = new DecimalFormat("#.##");
|
||||
// DecimalFormatSymbols dfs = new DecimalFormatSymbols();
|
||||
// dfs.setDecimalSeparator('.');
|
||||
// nFormat.setDecimalFormatSymbols(dfs);
|
||||
//
|
||||
//
|
||||
// int nbInterval = 100;
|
||||
// double max = 72178d +1;
|
||||
//
|
||||
// for(int i = 0; i < nbInterval; i++){
|
||||
// String interval = "["+ nFormat.format((max/nbInterval)*i) +";"+ nFormat.format((max/nbInterval)*(i+1)) +"[";
|
||||
// listInterval.add(interval);
|
||||
// mapIntervalCount.put(interval, 0);
|
||||
// }
|
||||
//
|
||||
// for(Integer itemId : mapItemOccurrence.keySet()){
|
||||
// String interval = listInterval.get((int) (mapItemOccurrence.get(itemId)/(max/nbInterval)));
|
||||
//
|
||||
// mapIntervalCount.put(interval, mapIntervalCount.getOrDefault(interval, 0)+1);
|
||||
// }
|
||||
//
|
||||
// try{
|
||||
// BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||
//
|
||||
// writer.write("interval,occurrence");
|
||||
// writer.newLine();
|
||||
//
|
||||
// for(String key : listInterval){
|
||||
// if(!mapIntervalCount.get(key).equals(0)) {
|
||||
// writer.write(key + "," + mapIntervalCount.get(key));
|
||||
// writer.newLine();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// writer.close();
|
||||
// } catch (IOException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
@ -0,0 +1,284 @@
|
|||
package main;
|
||||
|
||||
import com.opencsv.CSVReader;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class DatasetConverter {
|
||||
|
||||
public static void convertCSVIntoTransaction(String input, String output){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(input));
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(output));
|
||||
|
||||
String line;
|
||||
int idOrder;
|
||||
int idProduct;
|
||||
|
||||
|
||||
int lastIdOrder = -1;
|
||||
boolean firstTransaction = true;
|
||||
|
||||
// Delete first line with the header.
|
||||
reader.readLine();
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] lines = line.split(",");
|
||||
|
||||
idOrder = Integer.valueOf(lines[0]);
|
||||
idProduct = Integer.valueOf(lines[1]);
|
||||
|
||||
|
||||
if(lastIdOrder != idOrder){
|
||||
if(firstTransaction){
|
||||
firstTransaction = false;
|
||||
} else {
|
||||
writer.write("\n");
|
||||
}
|
||||
|
||||
lastIdOrder = idOrder;
|
||||
}
|
||||
|
||||
writer.write(idProduct + " ");
|
||||
}
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
} catch (IOException exception){
|
||||
exception.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void replaceIdByAisle(String product, String dataset, String output){
|
||||
Map<Integer, Integer> mapProductIdAisleId = new HashMap<>();
|
||||
|
||||
try{
|
||||
Set<Integer> setAisleKept = new HashSet<>();
|
||||
BufferedReader reader;// = new BufferedReader(new FileReader(product));
|
||||
BufferedWriter writer;// = new BufferedWriter(new FileWriter(DataExplorer.workingDirectory + "replace.tmp"));
|
||||
String line;
|
||||
int idOrder;
|
||||
int previousIdOrder = -1;
|
||||
int idProduct;
|
||||
boolean firstTransaction = true;
|
||||
|
||||
CSVReader csvreader = new CSVReader(new FileReader(product));
|
||||
String[] nline;
|
||||
|
||||
csvreader.readNext();
|
||||
while ((nline = csvreader.readNext()) != null) {
|
||||
try {
|
||||
mapProductIdAisleId.put(Integer.valueOf(nline[0]), Integer.valueOf(nline[2]));
|
||||
} catch (NumberFormatException e){
|
||||
|
||||
for(String l : nline) {
|
||||
System.out.print(l);
|
||||
if(!l.equals(nline[nline.length-1])) {
|
||||
System.out.print(",");
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
reader = new BufferedReader(new FileReader(dataset));
|
||||
writer = new BufferedWriter(new FileWriter(output));
|
||||
|
||||
reader.readLine();
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] lines = line.split(",");
|
||||
|
||||
idOrder = Integer.valueOf(lines[0]);
|
||||
idProduct = Integer.valueOf(lines[1]);
|
||||
|
||||
|
||||
if(previousIdOrder != idOrder){
|
||||
if(firstTransaction){
|
||||
firstTransaction = false;
|
||||
} else {
|
||||
|
||||
if(!setAisleKept.isEmpty()) {
|
||||
for (Integer idAisle : setAisleKept) {
|
||||
writer.write(idAisle.toString() + " ");
|
||||
}
|
||||
writer.write("\n");
|
||||
}
|
||||
|
||||
setAisleKept.clear();
|
||||
}
|
||||
|
||||
previousIdOrder = idOrder;
|
||||
}
|
||||
|
||||
setAisleKept.add(mapProductIdAisleId.get(idProduct));
|
||||
}
|
||||
|
||||
// For the last transaction.
|
||||
if(!setAisleKept.isEmpty()) {
|
||||
for (Integer idAisle : setAisleKept) {
|
||||
writer.write(idAisle.toString() + " ");
|
||||
}
|
||||
writer.write("\n");
|
||||
}
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void sortTransaction(String transactionPath){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(transactionPath));
|
||||
StringBuilder stringBuilder = new StringBuilder("");
|
||||
String line;
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] linePart = line.split(" ");
|
||||
SortedSet<Integer> sortedSet = new TreeSet<>();
|
||||
|
||||
for(String item : linePart){
|
||||
sortedSet.add(Integer.valueOf(item));
|
||||
}
|
||||
|
||||
for(Integer item_id : sortedSet){
|
||||
stringBuilder.append(item_id);
|
||||
stringBuilder.append(' ');
|
||||
}
|
||||
|
||||
stringBuilder.append('\n');
|
||||
}
|
||||
|
||||
reader.close();
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(transactionPath));
|
||||
|
||||
writer.write(stringBuilder.toString());
|
||||
|
||||
writer.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void convertCSVIntoSequences(String rawSequenceDataset, String outputSequence, String outputInfo){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(rawSequenceDataset));
|
||||
BufferedWriter writerSequence = new BufferedWriter(new FileWriter(outputSequence));
|
||||
BufferedWriter writerInfo = new BufferedWriter(new FileWriter(outputInfo));
|
||||
|
||||
String line;
|
||||
StringBuilder sequenceBuilder = new StringBuilder("");
|
||||
int lineNumber = 0;
|
||||
|
||||
Map<String, Integer> mapNameId = new HashMap<>();
|
||||
int freeID = 1;
|
||||
int lastCustomerID = -1;
|
||||
int lastOrderNumber = -1;
|
||||
boolean firstSequence = true;
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] lineParts = line.split("\t");
|
||||
Integer customerID = Integer.valueOf(lineParts[0]);
|
||||
Integer orderNumber = Integer.valueOf(lineParts[1]);
|
||||
|
||||
if(!customerID.equals(lastCustomerID)){
|
||||
if(firstSequence){
|
||||
firstSequence = false;
|
||||
}else{
|
||||
sequenceBuilder.append("-2");
|
||||
writerSequence.write(sequenceBuilder.toString());
|
||||
writerSequence.newLine();
|
||||
|
||||
sequenceBuilder = new StringBuilder("");
|
||||
}
|
||||
|
||||
lastCustomerID = customerID;
|
||||
} else if(orderNumber <= lastOrderNumber){
|
||||
System.out.println("Line "+ lineNumber +" :c");
|
||||
}
|
||||
|
||||
String[] items = lineParts[3].split(",");
|
||||
for(String item : items){
|
||||
if(item.equals("")){
|
||||
break;
|
||||
}
|
||||
|
||||
Integer itemID = mapNameId.getOrDefault(item, -1);
|
||||
|
||||
if(itemID.equals(-1)){
|
||||
mapNameId.put(item, freeID);
|
||||
itemID = freeID;
|
||||
freeID++;
|
||||
}
|
||||
|
||||
sequenceBuilder.append(itemID);
|
||||
sequenceBuilder.append(' ');
|
||||
}
|
||||
sequenceBuilder.append("-1 ");
|
||||
|
||||
lastOrderNumber = orderNumber;
|
||||
lineNumber++;
|
||||
}
|
||||
|
||||
for(String item : new TreeSet<>(mapNameId.keySet())){
|
||||
writerInfo.write(item);
|
||||
writerInfo.write(',');
|
||||
writerInfo.write(mapNameId.get(item).toString());
|
||||
writerInfo.newLine();
|
||||
}
|
||||
|
||||
reader.close();
|
||||
writerSequence.close();
|
||||
writerInfo.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void sortSequences(String sequenceDataset){
|
||||
try{
|
||||
BufferedReader reader = new BufferedReader(new FileReader(sequenceDataset));
|
||||
StringBuilder stringBuilder = new StringBuilder("");
|
||||
String line;
|
||||
|
||||
while ((line = reader.readLine()) != null){
|
||||
String[] linePart = line.split(" -1 ");
|
||||
|
||||
for(String itemset : linePart) {
|
||||
if(!itemset.contains("-2")) {
|
||||
SortedSet<Integer> sortedSet = new TreeSet<>();
|
||||
String[] items = itemset.split(" ");
|
||||
|
||||
for (String item : items) {
|
||||
sortedSet.add(Integer.valueOf(item));
|
||||
}
|
||||
|
||||
for(Integer item_id : sortedSet){
|
||||
stringBuilder.append(item_id);
|
||||
stringBuilder.append(' ');
|
||||
}
|
||||
}
|
||||
|
||||
stringBuilder.append("-1 ");
|
||||
}
|
||||
|
||||
stringBuilder.delete(stringBuilder.lastIndexOf("-1"), stringBuilder.lastIndexOf("-1") +3);
|
||||
|
||||
stringBuilder.append("-2\n");
|
||||
}
|
||||
|
||||
reader.close();
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(sequenceDataset));
|
||||
|
||||
writer.write(stringBuilder.toString());
|
||||
|
||||
writer.close();
|
||||
} catch (IOException e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue