CS106B Assignment2

Foreword

记录2022 winter的CS106B Assignment2

Rosetta Stone

利用trigram来判断一段文本属于哪种语言

算是对decomposition的训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
Map<string, double> kGramsIn(const string& str, int kGramLength) {
Map<string, double> res;
if (kGramLength <= 0) {
error("kGramLength should be greater than zero.");
} else {
for (int i = 0; i + kGramLength - 1 < str.length(); ++i) {
string tmp = "";
for (int j = 0; j < kGramLength; ++j) {
tmp += str[i + j];
}
res[tmp] += 1;
}
return res;
}
}

Map<string, double> normalize(const Map<string, double>& input) {
double squareSum = 0.0;
for (auto key : input) {
double v = input[key];
squareSum += v * v;
}
if (squareSum == 0.0) {
error("The input map is meaningless.");
}
Map<string, double> res;
for (auto key : input) {
res[key] = input[key] / sqrt(squareSum);
}
return res;
}

Map<string, double> topKGramsIn(const Map<string, double>& source, int numToKeep) {
if (numToKeep < 0) {
error("numToKeep should be positive.");
}
PriorityQueue<string> pq;
for (auto key : source) {
pq.enqueue(key, source[key]);
}
Map<string, double> res;
int numToDiscard = source.size() - numToKeep;
while (pq.size() > 0) {
string gram = pq.dequeue();
if (numToDiscard <= 0) {
res[gram] = source[gram];
} else {
--numToDiscard;
}
}
return res;
}

double cosineSimilarityOf(const Map<string, double>& lhs, const Map<string, double>& rhs) {
Set<string> both;
for (auto key : lhs) {
if (rhs.containsKey(key)) {
both.add(key);
}
}
double res = 0.0;
for (auto key : both) {
res += lhs[key] * rhs[key];
}
return res;
}

string guessLanguageOf(const Map<string, double>& textProfile,
const Set<Corpus>& corpora) {
if (corpora.size() == 0) {
error("We need more corpus.");
}
string ans;
double curSimilarity = 0.0;
for (auto corpus : corpora) {
double similarity = cosineSimilarityOf(textProfile, corpus.profile);
if (curSimilarity < similarity) {
ans = corpus.name;
curSimilarity = similarity;
}
}
return ans;
}

Rising Tides

实现Flood Fill算法

Qt里这个作业的可视化真的满有意思

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
Grid<bool> floodedRegionsIn(const Grid<double>& terrain,
const Vector<GridLocation>& sources,
double height) {
Grid<bool> res(terrain.numRows(), terrain.numCols());
res.fill(false);
int dx[] = {-1, 1, 0, 0};
int dy[] = {0, 0, -1, 1};
auto bfs = [&](int row, int col) -> void {
Queue<pair<int, int>> q;
if (height >= terrain[row][col]) {
q.enqueue({row, col});
res[row][col] = true;
}
while (q.size()) {
auto [r, c] = q.dequeue();
for (int i = 0; i < 4; ++i) {
int nr = r + dx[i], nc = c + dy[i];
if (nr < 0 || nr >= terrain.numRows() || nc < 0 || nc >= terrain.numCols()) {
continue;
} else if (res[nr][nc] == true) {
continue;
} else if (terrain[nr][nc] > height) {
continue;
} else {
res[nr][nc] = true;
q.enqueue({nr, nc});
}
}
}
return;
};
for (auto source : sources) {
int row = source.row;
int col = source.col;
bfs(row, col);
}

return res;
}

效果

输文本猜语言

海平面上升的湾区