Group & Then Concatenate Values of a Specified Field

Question

I have a CSV file which has column names as: “people”, “committers”, "repositoryCommitters.

The “people” column has ids from 1-5923 and I want to match the ids if they have the common repository from the “repositoryCommitters” column.

The file is something like this:

people | repositoryCommitters

 

1 | x

 

2 | x

 

3 | y

 

people ids 1 and 2 have the common repo “x” and how do I get these ids and print them in the output like:

 

*Edges

 

1 2

 

This means 1 and 2 are linked because they have the common repository.

So far the code I have is:

package network;

 

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.FileWriter;

import java.io.IOException;

import java.io.LineNumberReader;

import java.io.PrintStream;

import java.io.Writer;

import java.util.ArrayList;

import java.util.Scanner;

 

public class Read {

 static String line;

 static BufferedReader br1 = null, br2 =null;

 static ArrayList<String> pList = new ArrayList<String>();

 static ArrayList<String> rList = new ArrayList<String>();

 static File fileName = new File("networkBuilder.txt");

 

 public static void main(String\[\] args) throws IOException

 { String fileContent = "*Vertices" ;

 

System.out.println("Enter your current directory:");

 Scanner scanner = new Scanner(System.in);

 String directory = scanner.nextLine();

 

try {

 br1 = new BufferedReader(new FileReader(directory + "//people.csv"));

 br2 = new BufferedReader(new FileReader(directory + "//repo.csv"));

 

} catch(FileNotFoundException e)

 {

System.out.println(e.getMessage() + "\\n file not found re-run and try again");

 System.exit(0);

 }

 int count = 0;

 try {

 while((line = br1.readLine()) != null){ //skip first line

 while((line = br1.readLine()) != null)

 {

 pList.add(line); // add to array list

 count++ ;

 

 } }

 

} catch (IOException error) {

 System.out.println(error.getMessage() + "Error reading file");

 }

 \**Vertices**\

System.out.println("\\n"); // new line

 System.out.println(fileContent + count); //print out vertices

 //print out each item in the ArrayList

 int size = pList.size();

 for(int i=0; i < size; i++){

 String\[\] data=(pList.get(i)).split(",");

 System.out.println(data\[1\]);

 

}

// Save the console output in a text file

 try{

 PrintStream myconsole = new PrintStream(new File(directory + "network.txt"));

 System.setOut(myconsole);

 //print out each item in the ArrayList

int sz = pList.size(); System.out.println(fileContent + count); //print out vertices

 for(int i=0; i < sz; i++){

 String\[\] data=(pList.get(i)).split(",");

 System.out.println(data\[1\]);

 }

 } catch(Exception er){

 }

 

 /* try{

 FileWriter fw = new FileWriter(fileName);

 Writer output = new BufferedWriter(fw);

 int size = pList.size();

 for(int j=0; j<size; j++){

 

 output.write(fileContent + count);

 ((BufferedWriter) output).newLine();

 output.write(pList.get(j) + "\\n");

 ((BufferedWriter) output).newLine();

 }

output.close();

 

 } */

 

 /** Edges**/

 fileContent = "\\n*Edges";

 System.out.println(fileContent);

 // peopleCSV();

 // repoCSV();

 

 } // end of main

}

 

And the output is:

Enter your current directory:

_C:\Users\StudentDoubts\Documents

*Vertices 5923

 

1

 

2

 

3 . . .

 

Answer

You need to group lines by the second column and then join up values in the first column into one line. Hardcoding is roundabout and difficult. SPL (Structured Process Language) handles this in an easier and more efficient way:

 

A

1

=file(“people.txt”).import@t(;,"|")

2

=A1.group(repositoryCommitters).new(~.(people).concat(“   “):*Edges)

3

=file("D:/result.txt").export@t(A2)

If you want to precede each line in the output file with repositoryCommitters, just modify A2 as:

=A1.group(repositoryCommitters).new(~.(people).string(" "):*Edges,repositoryCommitters:repositoryCommitters)

 

esProc provides JDBC interface to let a third-party program to call an SPL script in the way they call a database result set. To know more about the invocation, see How to Call an SPL Script in Java.