Algo of Strings
Algo of Strings
Algo of Strings
IP CSE 408.pdf
.
1. Algorithms On Strings:
Week 1:
Programming Assignment 1.
non_shared_substring.cpp
#include <iostream>
#include <string>
return result;
}
return 0;
}
2.
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <utility>
using std::cin;
using std::cout;
using std::endl;
using std::map;
using std::string;
using std::vector;
using std::pair;
using std::make_pair;
struct Node {
pair<int, int> range;
vector<Node> next;
};
int main() {
string text;
cin >> text;
vector<string> edges =
ComputeSuffixTreeEdges(text);
for (int i = 0; i < edges.size(); ++i) {
cout << edges[i] << endl;
}
return 0;
}
3.
#include <string>
#include <iostream>
#include <vector>
#include <map>
#include <utility>
using std::map;
using std::vector;
using std::string;
using std::pair;
typedef map<char, int> edges;
typedef vector<edges> trie;
trie t = build_trie(patterns);
for (size_t i = 0; i < t.size(); ++i) {
for (const auto & j : t[i]) {
if (j.first != '$') {
std::cout << i << "->" << j.second << ":" <<
j.first << "\n";
}
}
}
return 0;
}
4.
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>
Node ()
{
fill (next, next + Letters, NA);
}
return result;
}
int n;
cin >> n;
vector <string> patterns (n);
for (int i = 0; i < n; i++)
{
cin >> patterns[i];
}
return 0;
}
5.
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>
struct Node
{
int next [Letters];
bool patternEnd;
Node ()
{
fill (next, next + Letters, NA);
patternEnd = false;
}
};
return result;
}
int n;
cin >> n;
vector <string> patterns (n);
for (int i = 0; i < n; i++)
{
cin >> patterns[i];
}
return 0;
}
Week 2:
1. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}
return 0;
}
int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);
2. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
// Preprocess the Burrows-Wheeler Transform bwt
of some text
// and compute as a result:
// * starts - for each character C in bwt, starts[C] is
the first position
// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt
and each position P in bwt,
// occ_count_before[C][P] is the number of
occurrences of character C in bwt
// from position 0 to position P inclusive.
int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}
return 0;
}
int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);
for (int pi = 0; pi < pattern_count; ++pi) {
string pattern;
cin >> pattern;
int occ_count = CountOccurrences(pattern, bwt,
starts, occ_count_before);
printf("%d ", occ_count);
}
printf("\n");
return 0;
}
3. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}
return 0;
}
int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);
4. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
// Preprocess the Burrows-Wheeler Transform bwt
of some text
// and compute as a result:
// * starts - for each character C in bwt, starts[C] is
the first position
// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt
and each position P in bwt,
// occ_count_before[C][P] is the number of
occurrences of character C in bwt
// from position 0 to position P inclusive.
int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}
return 0;
}
int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);
for (int pi = 0; pi < pattern_count; ++pi) {
string pattern;
cin >> pattern;
int occ_count = CountOccurrences(pattern, bwt,
starts, occ_count_before);
printf("%d ", occ_count);
}
printf("\n");
return 0;
}
Week 3:
Week 4:
1. #include <cstdio>
#include <iostream>
#include <string>
#include <vector>
using std::cin;
using std::string;
using std::vector;
int main() {
string pattern, text;
cin >> pattern;
cin >> text;
vector<int> result = find_pattern(pattern, text);
for (int i = 0; i < result.size(); ++i) {
printf("%d ", result[i]);
}
printf("\n");
return 0;
}
2. #include <cstdio>
#include <iostream>
#include <string>
#include <vector>
using std::cin;
using std::string;
using std::vector;
int main() {
string pattern, text;
cin >> pattern;
cin >> text;
vector<int> result = find_pattern(pattern, text);
for (int i = 0; i < result.size(); ++i) {
printf("%d ", result[i]);
}
printf("\n");
return 0;
}
3. #include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
using std::cin;
using std::cout;
using std::endl;
using std::make_pair;
using std::pair;
using std::string;
using std::vector;
void print_int_v(vector<int>& v) {
for (int i = 0; i < v.size(); i++) {
cout << v[i] << " ";
}
cout << endl;
}
int char_to_idx(char x) {
switch (x) {
case '$':
return 0;
case 'A':
return 1;
case 'C':
return 2;
case 'G':
return 3;
case 'T':
return 4;
}
}
void counting_sort_char(const string& text,
vector<int>& order) {
vector<int> c_f(5, 0);
for (int i = 0; i < text.size(); i++) {
c_f[char_to_idx(text[i])]++;
}
for (int i = 1; i < c_f.size(); i++) {
c_f[i] += c_f[i - 1];
}
for (int i = text.size() - 1; i >= 0; i--) {
order[--c_f[char_to_idx(text[i])]] = i;
}
}
vector<int> sort_class(text.size());
compute_class(text, order, sort_class);
return order;
}
int main() {
string text;
cin >> text;
vector<int> suffix_array = BuildSuffixArray(text);
for (int i = 0; i < suffix_array.size(); ++i) {
cout << suffix_array[i] << ' ';
}
cout << endl;
return 0;
}
4. #include <algorithm>
#include <cstdio>
#include <map>
#include <string>
#include <utility>
#include <vector>
using std::make_pair;
using std::map;
using std::pair;
using std::string;
using std::vector;
int main() {
char buffer[200001];
scanf("%s", buffer);
string text = buffer;
vector<int> suffix_array(text.length());
for (int i = 0; i < text.length(); ++i) {
scanf("%d", &suffix_array[i]);
}
vector<int> lcp_array(text.length() - 1);
for (int i = 0; i + 1 < text.length(); ++i) {
scanf("%d", &lcp_array[i]);
}
// Build the suffix tree and get a mapping from
// suffix tree node ID to the list of outgoing Edges.
map<int, vector<Edge> > tree =
SuffixTreeFromSuffixArray(suffix_array, lcp_array,
text);
printf("%s\n", buffer);
// Output the edges of the suffix tree in the
required order.
// Note that we use here the contract that the root
of the tree
// will have node ID = 0 and that each vector of
outgoing edges
// will be sorted by the first character of the
corresponding edge label.
//
// The following code avoids recursion to avoid
stack overflow issues.
// It uses a stack to convert recursive function to a
while loop.
// The stack stores pairs (node, edge_index).
// This code is an equivalent of
//
// OutputEdges(tree, 0);
//
// for the following _recursive_ function
OutputEdges:
//
// void OutputEdges(map<int, vector<Edge> >
tree, int node_id) {
// const vector<Edge>& edges = tree[node_id];
// for (int edge_index = 0; edge_index <
edges.size(); ++edge_index) {
// printf("%d %d\n", edges[edge_index].start,
edges[edge_index].end);
// OutputEdges(tree, edges[edge_index].node);
// }
// }
//
vector<pair<int, int> > stack(1, make_pair(0, 0));
while (!stack.empty()) {
pair<int, int> p = stack.back();
stack.pop_back();
int node = p.first;
int edge_index = p.second;
if (!tree.count(node)) {
continue;
}
const vector<Edge>& edges = tree[node];
if (edge_index + 1 < edges.size()) {
stack.push_back(make_pair(node, edge_index +
1));
}
printf("%d %d\n", edges[edge_index].start,
edges[edge_index].end);
stack.push_back(make_pair(edges[edge_index].node
, 0));
}
return 0;
}