반응형
package kr.samdogs.study.func.pojo;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class LineCount {
public static void main(String[] args) {
/* 단어카운트 함수형 자바 */
Path location = Paths.get("LineCount.java");
//구두점 문구 패턴
Pattern punctuation = Pattern.compile("\\p{Punct}");
Pattern whitespace = Pattern.compile("\\s+");
Pattern words = Pattern.compile("\\w+");
try(Stream<String> stream = Files.lines(location)){
Map<String, Integer> wordCount =
stream.map(punctuation::matcher)
.map(matcher -> matcher.replaceAll("")) //구두점 삭제
.map(whitespace::split)
.flatMap(Arrays::stream) // 공백 기준 단어 분할
.filter(word -> words.matcher(word).matches()) //같은단어끼리 모으기??
.map(String::toLowerCase) //표준화(모두소문자)
.collect(Collectors.toMap(Function.identity(), //단어세기
word -> 1,
Integer::sum));
System.out.println(wordCount);
}catch(IOException e) {
System.err.print(e.getMessage());
}
}
}
단계별로 보기 위해서 는 중간중간 컬렉팅 하는 걸로 찍어 보면 된다!
1. 최초에 stream 으로 바꾸면 라인 단위로 들어오게 된다.
System.out.println(
stream.collect(Collectors.toList()).toString()
);
//출력
[package kr.samdogs.study.func.pojo;, , import java.io.IOException;, import java.nio.file.Files;, import java.nio.file.Path;, import java.nio.file.Paths;, import java.util.Arrays;, import java.util.Map;, import java.util.function.Function;, import java.util.regex.Pattern;, import java.util.stream.Collectors;, import java.util.stream.Stream;, , public class LineCount {, , public static void main(String[] args) {, /* 단어카운트 함수형 자바 */, , Path location = Paths.get("LineCount.java");, , //구두점 문구 패턴, Pattern punctuation = Pattern.compile("\\p{Punct}");, Pattern whitespace = Pattern.compile("\\s+");, Pattern words = Pattern.compile("\\w+");, , try(Stream<String> stream = Files.lines(location)){, , Map<String, Integer> wordCount = , stream.map(punctuation::matcher), .map(matcher -> matcher.replaceAll("")) //구두점 삭제, .map(whitespace::split), .flatMap(Arrays::stream) // 공백 기준 단어 분할, .filter(word -> words.matcher(word).matches()) //같은단어끼리 모으기??, .map(String::toLowerCase) //표준화(모두소문자), .collect(Collectors.toMap(Function.identity(), //단어세기, word -> 1,, Integer::sum));, System.out.println(wordCount);, }catch(IOException e) {, System.err.print(e.getMessage()); , }, , }, , }]
2. 이걸 구두점(,;. 등등) 들을 모두 삭제
System.out.println(
stream.map(punctuation::matcher)
.map(matcher -> matcher.replaceAll(""))
.collect(Collectors.toList()).toString()
);
//출력
[package krsamdogsstudyfuncpojo, , import javaioIOException, import javaniofileFiles, import javaniofilePath, import javaniofilePaths, import javautilArrays, import javautilMap, import javautilfunctionFunction, import javautilregexPattern, import javautilstreamCollectors, import javautilstreamStream, , public class LineCount , , public static void mainString args , 단어카운트 함수형 자바 , , Path location PathsgetLineCountjava, , 구두점 문구 패턴, Pattern punctuation PatterncompilepPunct, Pattern whitespace Patterncompiles, Pattern words Patterncompilew, , tryStreamString stream Fileslineslocation, , MapString Integer wordCount , streammappunctuationmatcher, mapmatcher matcherreplaceAll 구두점 삭제, mapwhitespacesplit, flatMapArraysstream 공백 기준 단어 분할, filterword wordsmatcherwordmatches 같은단어끼리 모으기, mapStringtoLowerCase 표준화모두소문자, collectCollectorstoMapFunctionidentity 단어세기, word 1, Integersum, SystemoutprintlnwordCount, catchIOException e , SystemerrprintegetMessage , , , , , ]
3. 그리고 공백 기준으로 스플릿! 하지만 이렇게만 하면 각각의 배열안에 참조로 바뀌므로 flatMap 을이용해서 2depth 배열을 1depth 로 바꿔준다.
System.out.println(
stream.map(punctuation::matcher)
.map(matcher -> matcher.replaceAll(""))
.map(whitespace::split)
.collect(Collectors.toList()).toString()
);
//출력-스플릿은 됐지만 단어가 분리되어 배열참조로 되었다.
[[Ljava.lang.String;@568db2f2, [Ljava.lang.String;@378bf509, [Ljava.lang.String;@5fd0d5ae, [Ljava.lang.String;@2d98a335, [Ljava.lang.String;@16b98e56, [Ljava.lang.String;@7ef20235, [Ljava.lang.String;@27d6c5e0, [Ljava.lang.String;@4f3f5b24, [Ljava.lang.String;@15aeb7ab, [Ljava.lang.String;@7b23ec81, [Ljava.lang.String;@6acbcfc0, [Ljava.lang.String;@5f184fc6, [Ljava.lang.String;@3feba861, [Ljava.lang.String;@5b480cf9, [Ljava.lang.String;@6f496d9f, [Ljava.lang.String;@723279cf, [Ljava.lang.String;@10f87f48, [Ljava.lang.String;@b4c966a, [Ljava.lang.String;@2f4d3709, [Ljava.lang.String;@4e50df2e, [Ljava.lang.String;@1d81eb93, [Ljava.lang.String;@7291c18f, [Ljava.lang.String;@34a245ab, [Ljava.lang.String;@7cc355be, [Ljava.lang.String;@6e8cf4c6, [Ljava.lang.String;@12edcd21, [Ljava.lang.String;@34c45dca, [Ljava.lang.String;@52cc8049, [Ljava.lang.String;@5b6f7412, [Ljava.lang.String;@27973e9b, [Ljava.lang.String;@312b1dae, [Ljava.lang.String;@7530d0a, [Ljava.lang.String;@27bc2616, [Ljava.lang.String;@3941a79c, [Ljava.lang.String;@506e1b77, [Ljava.lang.String;@4fca772d, [Ljava.lang.String;@9807454, [Ljava.lang.String;@3d494fbf, [Ljava.lang.String;@1ddc4ec2, [Ljava.lang.String;@133314b, [Ljava.lang.String;@b1bc7ed, [Ljava.lang.String;@7cd84586, [Ljava.lang.String;@30dae81, [Ljava.lang.String;@1b2c6ec2, [Ljava.lang.String;@4edde6e5]
System.out.println(
stream.map(punctuation::matcher)
.map(matcher -> matcher.replaceAll(""))
.map(whitespace::split)
.flatMap(Arrays::stream) // 2depth -> 1depth
.collect(Collectors.toList()).toString()
);
//출력-flatMap 을 통해 평탄?화
[package, krsamdogsstudyfuncpojo, , import, javaioIOException, import, javaniofileFiles, import, javaniofilePath, import, javaniofilePaths, import, javautilArrays, import, javautilMap, import, javautilfunctionFunction, import, javautilregexPattern, import, javautilstreamCollectors, import, javautilstreamStream, , public, class, LineCount, , , public, static, void, mainString, args, , 단어카운트, 함수형, 자바, , Path, location, PathsgetLineCountjava, , 구두점, 문구, 패턴, , Pattern, punctuation, PatterncompilepPunct, , Pattern, whitespace, Patterncompiles, , Pattern, words, Patterncompilew, , tryStreamString, stream, Fileslineslocation, , MapString, Integer, wordCount, , streammappunctuationmatcher, , mapmatcher, matcherreplaceAll, 구두점, 삭제, , mapwhitespacesplit, , flatMapArraysstream, 공백, 기준, 단어, 분할, , filterword, wordsmatcherwordmatches, 같은단어끼리, 모으기, , mapStringtoLowerCase, 표준화모두소문자, , collectCollectorstoMapFunctionidentity, 단어세기, , word, 1, , Integersum, , SystemoutprintlnwordCount, , catchIOException, e, , SystemerrprintegetMessage, , , ]
4. 영어단어만 모으고, 표준화(소문자화)를 한후, 단어를 key 로 바꾸어서 단어카운트 맵을 완성한다.
System.out.println(
stream.map(punctuation::matcher)
.map(matcher -> matcher.replaceAll(""))
.map(whitespace::split)
.flatMap(Arrays::stream)
.filter(word -> words.matcher(word).matches()) //같은단어끼리 모으기??
.map(String::toLowerCase) //표준화(모두소문자)
.collect(Collectors.toMap(Function.identity(),
word -> 1,
Integer::sum))
);
//출력
{wordcount=1, mapmatcher=1, integer=1, javaniofilepath=1, wordsmatcherwordmatches=1, path=1, catchioexception=1, integersum=1, javaioioexception=1, trystreamstring=1, void=1, static=1, package=1, javautilstreamcollectors=1, patterncompiles=1, javautilmap=1, 1=1, linecount=1, patterncompilew=1, javautilfunctionfunction=1, patterncompileppunct=1, word=1, krsamdogsstudyfuncpojo=1, streammappunctuationmatcher=1, import=10, javaniofilefiles=1, javautilregexpattern=1, pattern=3, javautilarrays=1, public=2, stream=1, javaniofilepaths=1, pathsgetlinecountjava=1, systemoutprintlnwordcount=1, flatmaparraysstream=1, mainstring=1, class=1, e=1, mapwhitespacesplit=1, words=1, mapstring=1, fileslineslocation=1, matcherreplaceall=1, args=1, systemerrprintegetmessage=1, javautilstreamstream=1, collectcollectorstomapfunctionidentity=1, punctuation=1, mapstringtolowercase=1, location=1, filterword=1, whitespace=1}
이렇게 사고하는게 쉽진 않은데, 이해만 한다면, 또 깔끔한 코드라고 생각된다~~!
노력노력!
반응형