-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscraper.js
More file actions
78 lines (66 loc) · 1.84 KB
/
scraper.js
File metadata and controls
78 lines (66 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env node
const cheerio = require('cheerio');
const ObjectsToCsv = require('objects-to-csv');
const fs = require('fs');
const program = require('commander');
program.version('0.1.0').parse(process.argv);
var inputFiles = [];
fs.readdirSync("inputs").forEach(file => {
inputFiles.push("inputs/" + file);
console.log(file);
});
// const inputFiles = program.args.slice(0, program.args.length - 1);
// console.log(inputFiles);
const outputFile = "outputs/" + program.args[program.args.length - 1];
console.log(outputFile);
let Persons = [];
let $ = null;
function GetName(text) {
const titleAndFullName = text.split(' ').slice(0,3)
let name = {
title: titleAndFullName[0],
firstName: titleAndFullName[1],
lastName: titleAndFullName[2]
}
return name
// return text.split(' ').slice(0,3).join(' ');
}
function ScrapePerson(row) {
let company = $(`#search-results-data-table-row-${row}-cell-3`)
.first()
.text();
let position = $(`#search-results-data-table-row-${row}-cell-4`)
.first()
.text();
var name = $(`#search-results-data-table-row-${row}-cell-7`)
.first()
.text();
var title = GetName(name).title;
var firstName = GetName(name).firstName
var lastName = GetName(name).lastName
let email = $(`#search-results-data-table-row-${row}-cell-10`)
.first()
.text();
let person = {
title,
firstName,
lastName,
email,
position,
company
};
console.log(person);
return person;
}
inputFiles.forEach(inputFile => {
const file = fs.readFileSync(__dirname + '/' + inputFile).toString();
$ = cheerio.load(file);
for (let i = 0; i < 300; i++) {
let person = ScrapePerson(i);
if (person == null) break;
Persons.push(person);
}
Persons = Persons.filter(person => !!person.email.trim());
});
let csv = new ObjectsToCsv(Persons);
csv.toDisk(outputFile);