FreelanceBot

Форк
0
/
ExchangeParser.java 
273 строки · 10.7 Кб
1
package telegramBot.task;
2

3

4
import org.apache.commons.lang3.StringEscapeUtils;
5
import telegramBot.dto.OrderDto;
6
import telegramBot.entity.Order;
7
import telegramBot.enums.Exchange;
8
import telegramBot.enums.HttpMethod;
9
import telegramBot.enums.Language;
10
import org.jsoup.nodes.Document;
11
import org.jsoup.nodes.Element;
12
import org.jsoup.select.Elements;
13
import org.springframework.stereotype.Component;
14

15
import java.io.*;
16
import java.net.HttpURLConnection;
17
import java.net.URL;
18
import java.util.*;
19
import java.util.stream.Collectors;
20

21

22
@Component
23
public class ExchangeParser {
24
    private static final String HABR_SELECTOR = ".task__column_desc";
25
    private static final String FL_SELECTOR = ".search-item-body";
26
    private static final Map<String, String> habrLinks = new HashMap<>();
27
    private static final Map<String, String> flLinks = new HashMap<>();
28
    private static final Map<String, String> kworkLinks = new HashMap<>();
29

30

31
    static {
32
        habrLinks.put(Language.JAVA.getName(), habrLik(Language.JAVA));
33
        habrLinks.put(Language.PYTHON.getName(), habrLik(Language.PYTHON));
34
        habrLinks.put(Language.JAVASCRIPT.getName(), habrJavaScriptLink());
35
        habrLinks.put(Language.PHP.getName(), habrLik(Language.PHP));
36
        habrLinks.put(Language.C.getName(), habrLik(Language.C));
37
        habrLinks.put(Language.RUBY.getName(), habrLik(Language.RUBY));
38

39
        flLinks.put(Language.JAVA.getName(), flLink(Language.JAVA));
40
        flLinks.put(Language.PYTHON.getName(), flLink(Language.PYTHON));
41
        flLinks.put(Language.JAVASCRIPT.getName(), flJavaScriptLink());
42
        flLinks.put(Language.PHP.getName(), flLink(Language.PHP));
43
        flLinks.put(Language.C.getName(), flLink(Language.C));
44
        flLinks.put(Language.RUBY.getName(), flLink(Language.RUBY));
45

46
        kworkLinks.put(Language.JAVA.getName(), kworkLink(Language.JAVA));
47
        kworkLinks.put(Language.PYTHON.getName(), kworkLink(Language.PYTHON));
48
        kworkLinks.put(Language.JAVASCRIPT.getName(), kworkJavaScriptLink());
49
        kworkLinks.put(Language.PHP.getName(), kworkLink(Language.PHP));
50
        kworkLinks.put(Language.C.getName(), kworkLink(Language.C));
51
        kworkLinks.put(Language.RUBY.getName(), kworkLink(Language.RUBY));
52
        kworkLinks.put(Language.PHP.getName(), kworkLink(Language.PHP));
53
    }
54

55

56
    public Map<Exchange, List<Order>> getOrders(Language language) {
57
        Map<Exchange, List<Order>> exchangeOrders = new HashMap<>();
58
        exchangeOrders.put(Exchange.HABR_FREELANCE, getHabrOrders(language));
59
        exchangeOrders.put(Exchange.FL_RU, getFlOrders(language));
60
        exchangeOrders.put(Exchange.KWORK, getKworkOrders(language));
61

62
        return exchangeOrders;
63

64
    }
65

66
    private List<Order> getHabrOrders(Language language) {
67
        List<Order> orders = new ArrayList<>();
68
        for(String link : habrLinks.get(language.getName()).split("\\|")) {
69
            Document document = getDocument(link);
70
            Elements elements = document.select(HABR_SELECTOR);
71
            for (Element e : elements) {
72
                Element titleElement = e.child(0).child(0).child(0);
73
                String taskTitle = titleElement.text();
74
                String taskLink = titleElement.attr("href");
75
                String taskTags = extractTags(e);
76

77
                OrderDto dto = new OrderDto(taskTitle, taskLink, taskTags);
78
                if(language == Language.JAVA && OrderQueryRelation.falseJavaPattern(dto)) continue;
79
                if(OrderQueryRelation.correctRelation(dto, language) == language){
80
                    orders.add(dto.toEntity());
81
                }
82
            }
83
        }
84

85
        return orders;
86
    }
87

88
    private List<Order> getFlOrders(Language language) {
89
        List<Order> orders = new ArrayList<>();
90
        for (String link : flLinks.get(language.getName()).split("\\|")) {
91
            Document document = getDocument(link);
92
            Elements elements = document.select(FL_SELECTOR);
93
            for (Element e : elements) {
94
                String taskTitle = trimHtml(e.child(1).child(0).text());
95
                String taskLink = e.child(1).child(0).attr("href");
96
                String taskDescription = trimHtml(e.child(2).text());
97

98
                OrderDto dto = new OrderDto(taskTitle, taskLink, taskDescription);
99
                if(language == Language.JAVA && OrderQueryRelation.falseJavaPattern(dto)) continue;
100
                if (OrderQueryRelation.correctRelation(dto, language) == language) {
101
                    orders.add(dto.toEntity());
102
                }
103
            }
104
        }
105
        return orders;
106
    }
107

108
    private List<Order> getKworkOrders(Language language) {
109
        List<Order> orders = new ArrayList<>();
110
        for (String link : kworkLinks.get(language.getName()).split("\\|")) {
111
            String kworkJson = getJSON(link, HttpMethod.POST);
112
            List<Order> filteredOrders = extractKworkOrders(kworkJson).stream().filter(order -> {
113
                if (language.equals(Language.JAVA)) {
114
                    return !OrderQueryRelation.falseJavaPattern(order) &&
115
                            OrderQueryRelation.correctRelation(order, language) == language;
116
                }
117
                return OrderQueryRelation.correctRelation(order, language) == language;
118
            })
119
                    .map(OrderDto::toEntity)
120
                    .collect(Collectors.toList());
121
            orders.addAll(filteredOrders);
122
        }
123
    return orders;
124
    }
125

126
    public Document getDocument(String link) {
127
        Document document = null;
128
        try {
129
            document = SSLHelper.getConnection(link).get();
130
        } catch (IOException e) {
131
            Throwable cause = e.getCause();
132
            if(cause != null) System.out.println(cause.getMessage());
133
        }
134
        return document;
135
    }
136

137
    private String trimHtml(String html) {
138
        return html.replaceAll("(<em>)", "").
139
                replaceAll("(</em>)", "");
140
    }
141

142
    public String getJSON(String link, HttpMethod httpMethod) {
143
        HttpURLConnection c = null;
144
        try {
145
            URL u = new URL(link);
146
            c = (HttpURLConnection) u.openConnection();
147
            c.setRequestMethod(httpMethod.getMethodName());
148
            c.setRequestProperty("Content-length", "0");
149
            c.setRequestProperty("Content-Type", "application/json");
150
            c.setUseCaches(false);
151
            c.setAllowUserInteraction(false);
152
            c.connect();
153
            int status = c.getResponseCode();
154

155
            switch (status) {
156
                case 200:
157
                case 201:
158
                    BufferedReader br = new BufferedReader(new InputStreamReader(c.getInputStream()));
159
                    StringBuilder sb = new StringBuilder();
160
                    String line;
161
                    while ((line = br.readLine()) != null) {
162
                        sb.append(line).append("\n");
163
                    }
164
                    br.close();
165
                    return sb.toString();
166
            }
167

168
        } catch (IOException e) {
169
            System.out.println(e.getCause().getMessage());
170
        } finally {
171
            if (c != null) {
172
                c.disconnect();
173
            }
174
        }
175
        return null;
176
    }
177

178
    private List<OrderDto> extractKworkOrders(String json){
179
        if(json == null) return new ArrayList<>();
180
        return Arrays.stream(json.
181
                split("(\\{|\\})")).
182
                filter(this :: filterCondition).
183
                map(StringEscapeUtils::unescapeJava).
184
                map(this :: mapToKworkOrder).
185
                collect(Collectors.toList());
186
    }
187

188
    private boolean filterCondition(String obj) {
189
        String idPat = "(\"id\")(:)\\d{7}";
190
        String langPat = "(\"lang\")(:)" + "\"" +"[a-z]{2}"+ "\"";
191
        int index = obj.indexOf(",");
192
        if(index != -1 && obj.substring(0, index).
193
                matches(idPat)){
194
            return obj.split(",")[1].matches(langPat);
195
        }
196
        return false;
197
    }
198

199
    private OrderDto mapToKworkOrder(String json){
200
        String idPrefix = "\"id\"", namePrefix = "\"name\"", descPrefix = "\"description\"";
201
        String title = null, link = null, description = null;
202
        String[] fields = json.split("(,\"|\",)");
203
        int index = 0;
204
        while(index != fields.length){
205
            String field = fields[index];
206
            if(link != null && title != null && description != null) break;
207

208
            if (field.startsWith(idPrefix)) {
209
                link = "/projects/" + field.substring(field.indexOf(":") + 1);
210
            }
211

212
            if (field.startsWith(namePrefix)) {
213
                title = field.substring(field.indexOf(":") + 1).
214
                        replaceAll("\"", "").trim();
215
            }
216

217
            if(field.startsWith(descPrefix)){
218
                int subIndex = field.indexOf(":") + 1;
219
                description = field.substring(subIndex).
220
                        replaceAll("\"", "").trim();
221
            }
222
            index ++ ;
223

224
        }
225

226

227
    return new OrderDto(title, link, description);
228
    }
229

230
    private String extractTags(Element element){
231
        Elements elements = element.child(1).child(0).children();
232
        StringBuilder sb = new StringBuilder();
233
        for(Element e : elements){
234
            sb.append(e.text()).append(",");
235
        }
236
    return sb.toString();
237
    }
238

239
    private static String habrLik(Language language){
240
        String link = "https://freelance.habr.com/tasks?page=1&q=lang&fields=tags";
241
        return link.replaceAll("(lang)", language.getName().toLowerCase());
242
    }
243

244
    private static String habrJavaScriptLink(){
245
        return "https://freelance.habr.com/tasks?page=1&q=javascript&fields=tags|" +
246
                "https://freelance.habr.com/tasks?page=1&q=java%20script&fields=tags|" +
247
                "https://freelance.habr.com/tasks?page=1&q=js&fields=tags";
248
    }
249

250
    private static String flLink(Language language){
251
        String link = "https://www.fl.ru/search/?action=search&type=projects&search_string=lang&page=1";
252
        return link.replaceAll("(lang)", language.getName().toLowerCase());
253
    }
254

255
    private static String flJavaScriptLink(){
256
        return "https://www.fl.ru/search/?action=search&type=projects&search_string=javascript&page=1|" +
257
                "https://www.fl.ru/search/?action=search&type=projects&search_string=java%20script&page=1|" +
258
                "https://www.fl.ru/search/?action=search&type=projects&search_string=js&page=1";
259
    }
260

261
    private static String kworkLink(Language language){
262
        String link = "https://kwork.ru/projects?keyword=lang&a=1.json";
263
        return link.replaceAll("(lang)", language.getName()).toLowerCase();
264
    }
265

266
    private static String kworkJavaScriptLink(){
267
        return "https://kwork.ru/projects?keyword=javascript&a=1.json|" +
268
                "https://kwork.ru/projects?keyword=java+script&a=1.json|" +
269
                "https://kwork.ru/projects?keyword=js&a=1.json";
270
    }
271

272

273
}
274

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.