FreelanceBot
273 строки · 10.7 Кб
1package telegramBot.task;2
3
4import org.apache.commons.lang3.StringEscapeUtils;5import telegramBot.dto.OrderDto;6import telegramBot.entity.Order;7import telegramBot.enums.Exchange;8import telegramBot.enums.HttpMethod;9import telegramBot.enums.Language;10import org.jsoup.nodes.Document;11import org.jsoup.nodes.Element;12import org.jsoup.select.Elements;13import org.springframework.stereotype.Component;14
15import java.io.*;16import java.net.HttpURLConnection;17import java.net.URL;18import java.util.*;19import java.util.stream.Collectors;20
21
22@Component
23public class ExchangeParser {24private static final String HABR_SELECTOR = ".task__column_desc";25private static final String FL_SELECTOR = ".search-item-body";26private static final Map<String, String> habrLinks = new HashMap<>();27private static final Map<String, String> flLinks = new HashMap<>();28private static final Map<String, String> kworkLinks = new HashMap<>();29
30
31static {32habrLinks.put(Language.JAVA.getName(), habrLik(Language.JAVA));33habrLinks.put(Language.PYTHON.getName(), habrLik(Language.PYTHON));34habrLinks.put(Language.JAVASCRIPT.getName(), habrJavaScriptLink());35habrLinks.put(Language.PHP.getName(), habrLik(Language.PHP));36habrLinks.put(Language.C.getName(), habrLik(Language.C));37habrLinks.put(Language.RUBY.getName(), habrLik(Language.RUBY));38
39flLinks.put(Language.JAVA.getName(), flLink(Language.JAVA));40flLinks.put(Language.PYTHON.getName(), flLink(Language.PYTHON));41flLinks.put(Language.JAVASCRIPT.getName(), flJavaScriptLink());42flLinks.put(Language.PHP.getName(), flLink(Language.PHP));43flLinks.put(Language.C.getName(), flLink(Language.C));44flLinks.put(Language.RUBY.getName(), flLink(Language.RUBY));45
46kworkLinks.put(Language.JAVA.getName(), kworkLink(Language.JAVA));47kworkLinks.put(Language.PYTHON.getName(), kworkLink(Language.PYTHON));48kworkLinks.put(Language.JAVASCRIPT.getName(), kworkJavaScriptLink());49kworkLinks.put(Language.PHP.getName(), kworkLink(Language.PHP));50kworkLinks.put(Language.C.getName(), kworkLink(Language.C));51kworkLinks.put(Language.RUBY.getName(), kworkLink(Language.RUBY));52kworkLinks.put(Language.PHP.getName(), kworkLink(Language.PHP));53}54
55
56public Map<Exchange, List<Order>> getOrders(Language language) {57Map<Exchange, List<Order>> exchangeOrders = new HashMap<>();58exchangeOrders.put(Exchange.HABR_FREELANCE, getHabrOrders(language));59exchangeOrders.put(Exchange.FL_RU, getFlOrders(language));60exchangeOrders.put(Exchange.KWORK, getKworkOrders(language));61
62return exchangeOrders;63
64}65
66private List<Order> getHabrOrders(Language language) {67List<Order> orders = new ArrayList<>();68for(String link : habrLinks.get(language.getName()).split("\\|")) {69Document document = getDocument(link);70Elements elements = document.select(HABR_SELECTOR);71for (Element e : elements) {72Element titleElement = e.child(0).child(0).child(0);73String taskTitle = titleElement.text();74String taskLink = titleElement.attr("href");75String taskTags = extractTags(e);76
77OrderDto dto = new OrderDto(taskTitle, taskLink, taskTags);78if(language == Language.JAVA && OrderQueryRelation.falseJavaPattern(dto)) continue;79if(OrderQueryRelation.correctRelation(dto, language) == language){80orders.add(dto.toEntity());81}82}83}84
85return orders;86}87
88private List<Order> getFlOrders(Language language) {89List<Order> orders = new ArrayList<>();90for (String link : flLinks.get(language.getName()).split("\\|")) {91Document document = getDocument(link);92Elements elements = document.select(FL_SELECTOR);93for (Element e : elements) {94String taskTitle = trimHtml(e.child(1).child(0).text());95String taskLink = e.child(1).child(0).attr("href");96String taskDescription = trimHtml(e.child(2).text());97
98OrderDto dto = new OrderDto(taskTitle, taskLink, taskDescription);99if(language == Language.JAVA && OrderQueryRelation.falseJavaPattern(dto)) continue;100if (OrderQueryRelation.correctRelation(dto, language) == language) {101orders.add(dto.toEntity());102}103}104}105return orders;106}107
108private List<Order> getKworkOrders(Language language) {109List<Order> orders = new ArrayList<>();110for (String link : kworkLinks.get(language.getName()).split("\\|")) {111String kworkJson = getJSON(link, HttpMethod.POST);112List<Order> filteredOrders = extractKworkOrders(kworkJson).stream().filter(order -> {113if (language.equals(Language.JAVA)) {114return !OrderQueryRelation.falseJavaPattern(order) &&115OrderQueryRelation.correctRelation(order, language) == language;116}117return OrderQueryRelation.correctRelation(order, language) == language;118})119.map(OrderDto::toEntity)120.collect(Collectors.toList());121orders.addAll(filteredOrders);122}123return orders;124}125
126public Document getDocument(String link) {127Document document = null;128try {129document = SSLHelper.getConnection(link).get();130} catch (IOException e) {131Throwable cause = e.getCause();132if(cause != null) System.out.println(cause.getMessage());133}134return document;135}136
137private String trimHtml(String html) {138return html.replaceAll("(<em>)", "").139replaceAll("(</em>)", "");140}141
142public String getJSON(String link, HttpMethod httpMethod) {143HttpURLConnection c = null;144try {145URL u = new URL(link);146c = (HttpURLConnection) u.openConnection();147c.setRequestMethod(httpMethod.getMethodName());148c.setRequestProperty("Content-length", "0");149c.setRequestProperty("Content-Type", "application/json");150c.setUseCaches(false);151c.setAllowUserInteraction(false);152c.connect();153int status = c.getResponseCode();154
155switch (status) {156case 200:157case 201:158BufferedReader br = new BufferedReader(new InputStreamReader(c.getInputStream()));159StringBuilder sb = new StringBuilder();160String line;161while ((line = br.readLine()) != null) {162sb.append(line).append("\n");163}164br.close();165return sb.toString();166}167
168} catch (IOException e) {169System.out.println(e.getCause().getMessage());170} finally {171if (c != null) {172c.disconnect();173}174}175return null;176}177
178private List<OrderDto> extractKworkOrders(String json){179if(json == null) return new ArrayList<>();180return Arrays.stream(json.181split("(\\{|\\})")).182filter(this :: filterCondition).183map(StringEscapeUtils::unescapeJava).184map(this :: mapToKworkOrder).185collect(Collectors.toList());186}187
188private boolean filterCondition(String obj) {189String idPat = "(\"id\")(:)\\d{7}";190String langPat = "(\"lang\")(:)" + "\"" +"[a-z]{2}"+ "\"";191int index = obj.indexOf(",");192if(index != -1 && obj.substring(0, index).193matches(idPat)){194return obj.split(",")[1].matches(langPat);195}196return false;197}198
199private OrderDto mapToKworkOrder(String json){200String idPrefix = "\"id\"", namePrefix = "\"name\"", descPrefix = "\"description\"";201String title = null, link = null, description = null;202String[] fields = json.split("(,\"|\",)");203int index = 0;204while(index != fields.length){205String field = fields[index];206if(link != null && title != null && description != null) break;207
208if (field.startsWith(idPrefix)) {209link = "/projects/" + field.substring(field.indexOf(":") + 1);210}211
212if (field.startsWith(namePrefix)) {213title = field.substring(field.indexOf(":") + 1).214replaceAll("\"", "").trim();215}216
217if(field.startsWith(descPrefix)){218int subIndex = field.indexOf(":") + 1;219description = field.substring(subIndex).220replaceAll("\"", "").trim();221}222index ++ ;223
224}225
226
227return new OrderDto(title, link, description);228}229
230private String extractTags(Element element){231Elements elements = element.child(1).child(0).children();232StringBuilder sb = new StringBuilder();233for(Element e : elements){234sb.append(e.text()).append(",");235}236return sb.toString();237}238
239private static String habrLik(Language language){240String link = "https://freelance.habr.com/tasks?page=1&q=lang&fields=tags";241return link.replaceAll("(lang)", language.getName().toLowerCase());242}243
244private static String habrJavaScriptLink(){245return "https://freelance.habr.com/tasks?page=1&q=javascript&fields=tags|" +246"https://freelance.habr.com/tasks?page=1&q=java%20script&fields=tags|" +247"https://freelance.habr.com/tasks?page=1&q=js&fields=tags";248}249
250private static String flLink(Language language){251String link = "https://www.fl.ru/search/?action=search&type=projects&search_string=lang&page=1";252return link.replaceAll("(lang)", language.getName().toLowerCase());253}254
255private static String flJavaScriptLink(){256return "https://www.fl.ru/search/?action=search&type=projects&search_string=javascript&page=1|" +257"https://www.fl.ru/search/?action=search&type=projects&search_string=java%20script&page=1|" +258"https://www.fl.ru/search/?action=search&type=projects&search_string=js&page=1";259}260
261private static String kworkLink(Language language){262String link = "https://kwork.ru/projects?keyword=lang&a=1.json";263return link.replaceAll("(lang)", language.getName()).toLowerCase();264}265
266private static String kworkJavaScriptLink(){267return "https://kwork.ru/projects?keyword=javascript&a=1.json|" +268"https://kwork.ru/projects?keyword=java+script&a=1.json|" +269"https://kwork.ru/projects?keyword=js&a=1.json";270}271
272
273}
274