Algo of Strings

Download as pdf or txt
Download as pdf or txt
You are on page 1of 85

CSE 408 Coursera Answers:

IP CSE 408.pdf

.
1. Algorithms On Strings:

Week 1:
Programming Assignment 1.

non_shared_substring.cpp

#include <iostream>
#include <string>

using namespace std;

string solve (string p, string q)


{
string result = p;

return result;
}

int main (void)


{
string p;
cin >> p;
string q;
cin >> q;

string ans = solve (p, q);

cout << ans << endl;

return 0;
}

2.
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <utility>

using std::cin;
using std::cout;
using std::endl;
using std::map;
using std::string;
using std::vector;
using std::pair;
using std::make_pair;

struct Node {
pair<int, int> range;
vector<Node> next;
};

// Build a suffix tree of the string text and return a


vector
// with all of the labels of its edges (the
corresponding
// substrings of the text) in any order.
vector<string> ComputeSuffixTreeEdges(const
string& text) {
vector<string> result;
// Implement this function yourself
Node trie;
for (int i = text.size() - 1; i >= 0; i--) {
bool found = false;
Node curr = trie;
int j = i;
bool cont_to_next_level = true;
while (cont_to_next_level) {
if (curr.next.empty()) {
break;
}
for (int k = 0; k < curr.next.size(); k++) {
if (text[j] == text[curr.next[k].range.first]) {
found = true;
int l = curr.next[k].range.first;
int r = curr.next[k].range.second;
while(text[j] == text[l]) {
if (l == r) {
cont_to_next_level = true;
curr = curr.next[k];
j++;
break;
}
j++;
l++;
cont_to_next_level = false;
}
if (!cont_to_next_level) {
curr = curr.next[k];
curr.range.second = l - 1;
Node new_node;
new_node.range = make_pair(j, text.size() -
1);
curr.next.push_back(new_node);
}
break;
}
}
}
if (!found) {
Node new_node;
new_node.range = make_pair(i, text.size() - 1);
trie.next.push_back(new_node);
}
}
return result;
}

int main() {
string text;
cin >> text;
vector<string> edges =
ComputeSuffixTreeEdges(text);
for (int i = 0; i < edges.size(); ++i) {
cout << edges[i] << endl;
}
return 0;
}

3.
#include <string>
#include <iostream>
#include <vector>
#include <map>
#include <utility>

using std::map;
using std::vector;
using std::string;
using std::pair;
typedef map<char, int> edges;
typedef vector<edges> trie;

trie build_trie(vector<string> & patterns) {


trie t;
// write your code here
for (int i = 0; i < patterns.size(); i++) {
int x = 0;
for (int j = 0; j < patterns[i].size(); j++) {
bool match_found = false;
if (x < t.size()) {
for (const auto & k : t[x]) {
if (k.first == patterns[i][j]) {
x = k.second;
match_found = true;
break;
}
}
if (!match_found) {
t[x].insert(pair<char, int> (patterns[i][j],
t.size()));
x = t.size();
}
} else {
map<char, int> m;
m.insert(pair<char, int> (patterns[i][j], t.size()
+ 1));
t.push_back(m);
x = t.size();
}
}
map<char, int> m;
m.insert(pair<char, int> ('$', -1));
t.push_back(m);
}
return t;
}
int main() {
size_t n;
std::cin >> n;
vector<string> patterns;
for (size_t i = 0; i < n; i++) {
string s;
std::cin >> s;
patterns.push_back(s);
}

trie t = build_trie(patterns);
for (size_t i = 0; i < t.size(); ++i) {
for (const auto & j : t[i]) {
if (j.first != '$') {
std::cout << i << "->" << j.second << ":" <<
j.first << "\n";
}
}
}

return 0;
}

4.
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

using namespace std;

int const Letters = 4;


int const NA = -1;
struct Node
{
int next [Letters];

Node ()
{
fill (next, next + Letters, NA);
}

bool isLeaf () const


{
return (next[0] == NA && next[1] == NA &&
next[2] == NA && next[3] == NA);
}
};

int letterToIndex (char letter)


{
switch (letter)
{
case 'A': return 0; break;
case 'C': return 1; break;
case 'G': return 2; break;
case 'T': return 3; break;
default: assert (false); return -1;
}
}

void build_trie (const vector <string>& patterns,


vector<Node> &t)
{
for (int i = 0; i < patterns.size(); i++)
{
int x = 0;
for (int j = 0; j < patterns[i].size(); j++)
{
int index = letterToIndex(patterns[i][j]);
if (x >= t.size())
{
t.resize(x + 1);
}
if (t[x].next[index] != -1)
{
x = t[x].next[index];
}
else
{
t[x].next[index] = t.size();
x = t[x].next[index];
t.resize(x + 1);
}
}
}
}

vector <int> solve (const string& text, int n, const


vector <string>& patterns)
{
vector <int> result;

// write your code here


vector<Node> t;
build_trie(patterns, t);

for (int i = 0; i < text.size(); i++)


{
int index = letterToIndex(text[i]);
int x = 0;
if (t[x].next[index] != -1)
{
bool found = true;
for (int j = i; !t[x].isLeaf() ; j++)
{
if (j >= text.size())
{
found = false;
break;
}
index = letterToIndex(text[j]);
if (t[x].next[index] != -1)
{
x = t[x].next[index];
}
else
{
found = false;
break;
}
}
if (found)
{
result.push_back(i);
}
}
}

return result;
}

int main (void)


{
string t;
cin >> t;

int n;
cin >> n;
vector <string> patterns (n);
for (int i = 0; i < n; i++)
{
cin >> patterns[i];
}

vector <int> ans;


ans = solve (t, n, patterns);

for (int i = 0; i < (int) ans.size (); i++)


{
cout << ans[i];
if (i + 1 < (int) ans.size ())
{
cout << " ";
}
else
{
cout << endl;
}
}

return 0;
}

5.

#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

using namespace std;


int const Letters = 4;
int const NA = -1;

struct Node
{
int next [Letters];
bool patternEnd;

Node ()
{
fill (next, next + Letters, NA);
patternEnd = false;
}
};

int letterToIndex (char letter)


{
switch (letter)
{
case 'A': return 0; break;
case 'C': return 1; break;
case 'G': return 2; break;
case 'T': return 3; break;
default: assert (false); return -1;
}
}

void build_trie (const vector <string>& patterns,


vector<Node> &t)
{
for (int i = 0; i < patterns.size(); i++)
{
int x = 0;
for (int j = 0; j < patterns[i].size(); j++)
{
int index = letterToIndex(patterns[i][j]);
if (x >= t.size())
{
t.resize(x + 1);
}
if (t[x].next[index] != -1)
{
x = t[x].next[index];
}
else
{
t[x].next[index] = t.size();
x = t[x].next[index];
t.resize(x + 1);
}
}
t[x].patternEnd = true;
}
}

vector <int> solve (const string& text, int n, const


vector <string>& patterns)
{
vector <int> result;

// write your code here


vector<Node> t;
build_trie(patterns, t);

for (int i = 0; i < text.size(); i++)


{
int index = letterToIndex(text[i]);
int x = 0;
if (t[x].next[index] != -1)
{
bool found = true;
for (int j = i; !t[x].patternEnd ; j++)
{
if (j >= text.size())
{
found = false;
break;
}
index = letterToIndex(text[j]);
if (t[x].next[index] != -1)
{
x = t[x].next[index];
}
else
{
found = false;
break;
}
}
if (found)
{
result.push_back(i);
}
}
}

return result;
}

int main (void)


{
string t;
cin >> t;

int n;
cin >> n;
vector <string> patterns (n);
for (int i = 0; i < n; i++)
{
cin >> patterns[i];
}

vector <int> ans;


ans = solve (t, n, patterns);

for (int i = 0; i < (int) ans.size (); i++)


{
cout << ans[i];
if (i + 1 < (int) ans.size ())
{
cout << " ";
}
else
{
cout << endl;
}
}

return 0;
}

Week 2:
1. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;

// Preprocess the Burrows-Wheeler Transform bwt


of some text
// and compute as a result:
// * starts - for each character C in bwt, starts[C] is
the first position
// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt
and each position P in bwt,
// occ_count_before[C][P] is the number of
occurrences of character C in bwt
// from position 0 to position P inclusive.

// Note: Make occ_count_before[C][P] exclusive P.


// It will make the code cleaner and easier to
implement.

void PreprocessBWT(const string& bwt,


map<char, int>& starts,
map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
for (int i = 0; i < bwt.size(); i++) {
starts[bwt[i]]++;
occ_count_before[bwt[i]][i + 1]++;
for (auto& it : occ_count_before) {
if (i < bwt.size()) {
it.second[i + 1] += it.second[i];
}
}
}

int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}

// Compute the number of occurrences of string


pattern in the text
// given only Burrows-Wheeler Transform bwt of
the text and additional
// information we get from the preprocessing stage
- starts and occ_count_before.
int CountOccurrences(const string& pattern,
const string& bwt,
const map<char, int>& starts,
const map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
string pattern_cp = pattern;
int top = 0;
int bottom = bwt.size() - 1;
while (top <= bottom) {
if (!pattern_cp.empty()) {
char symbol = pattern_cp.back();
pattern_cp.pop_back();
if (occ_count_before.find(symbol)-
>second[bottom + 1] >
occ_count_before.find(symbol)->second[top]) {
top = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[top];
bottom = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[bottom +
1] - 1;
} else {
return 0;
}
} else {
return bottom - top + 1;
}
}

return 0;
}
int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);

for (int pi = 0; pi < pattern_count; ++pi) {


string pattern;
cin >> pattern;
int occ_count = CountOccurrences(pattern, bwt,
starts, occ_count_before);
printf("%d ", occ_count);
}
printf("\n");
return 0;
}

2. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>

using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
// Preprocess the Burrows-Wheeler Transform bwt
of some text
// and compute as a result:
// * starts - for each character C in bwt, starts[C] is
the first position
// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt
and each position P in bwt,
// occ_count_before[C][P] is the number of
occurrences of character C in bwt
// from position 0 to position P inclusive.

// Note: Make occ_count_before[C][P] exclusive P.


// It will make the code cleaner and easier to
implement.

void PreprocessBWT(const string& bwt,


map<char, int>& starts,
map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
for (int i = 0; i < bwt.size(); i++) {
starts[bwt[i]]++;
occ_count_before[bwt[i]][i + 1]++;
for (auto& it : occ_count_before) {
if (i < bwt.size()) {
it.second[i + 1] += it.second[i];
}
}
}

int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}

// Compute the number of occurrences of string


pattern in the text
// given only Burrows-Wheeler Transform bwt of
the text and additional
// information we get from the preprocessing stage
- starts and occ_count_before.
int CountOccurrences(const string& pattern,
const string& bwt,
const map<char, int>& starts,
const map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
string pattern_cp = pattern;
int top = 0;
int bottom = bwt.size() - 1;
while (top <= bottom) {
if (!pattern_cp.empty()) {
char symbol = pattern_cp.back();
pattern_cp.pop_back();
if (occ_count_before.find(symbol)-
>second[bottom + 1] >
occ_count_before.find(symbol)->second[top]) {
top = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[top];
bottom = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[bottom +
1] - 1;
} else {
return 0;
}
} else {
return bottom - top + 1;
}
}

return 0;
}

int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);
for (int pi = 0; pi < pattern_count; ++pi) {
string pattern;
cin >> pattern;
int occ_count = CountOccurrences(pattern, bwt,
starts, occ_count_before);
printf("%d ", occ_count);
}
printf("\n");
return 0;
}

3. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;

// Preprocess the Burrows-Wheeler Transform bwt


of some text
// and compute as a result:
// * starts - for each character C in bwt, starts[C] is
the first position
// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt
and each position P in bwt,
// occ_count_before[C][P] is the number of
occurrences of character C in bwt
// from position 0 to position P inclusive.

// Note: Make occ_count_before[C][P] exclusive P.


// It will make the code cleaner and easier to
implement.

void PreprocessBWT(const string& bwt,


map<char, int>& starts,
map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
for (int i = 0; i < bwt.size(); i++) {
starts[bwt[i]]++;
occ_count_before[bwt[i]][i + 1]++;
for (auto& it : occ_count_before) {
if (i < bwt.size()) {
it.second[i + 1] += it.second[i];
}
}
}

int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}

// Compute the number of occurrences of string


pattern in the text
// given only Burrows-Wheeler Transform bwt of
the text and additional
// information we get from the preprocessing stage
- starts and occ_count_before.
int CountOccurrences(const string& pattern,
const string& bwt,
const map<char, int>& starts,
const map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
string pattern_cp = pattern;
int top = 0;
int bottom = bwt.size() - 1;
while (top <= bottom) {
if (!pattern_cp.empty()) {
char symbol = pattern_cp.back();
pattern_cp.pop_back();
if (occ_count_before.find(symbol)-
>second[bottom + 1] >
occ_count_before.find(symbol)->second[top]) {
top = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[top];
bottom = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[bottom +
1] - 1;
} else {
return 0;
}
} else {
return bottom - top + 1;
}
}

return 0;
}
int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);

for (int pi = 0; pi < pattern_count; ++pi) {


string pattern;
cin >> pattern;
int occ_count = CountOccurrences(pattern, bwt,
starts, occ_count_before);
printf("%d ", occ_count);
}
printf("\n");
return 0;
}

4. #include <algorithm>
#include <cstdio>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>

using std::cin;
using std::istringstream;
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
// Preprocess the Burrows-Wheeler Transform bwt
of some text
// and compute as a result:
// * starts - for each character C in bwt, starts[C] is
the first position
// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt
and each position P in bwt,
// occ_count_before[C][P] is the number of
occurrences of character C in bwt
// from position 0 to position P inclusive.

// Note: Make occ_count_before[C][P] exclusive P.


// It will make the code cleaner and easier to
implement.

void PreprocessBWT(const string& bwt,


map<char, int>& starts,
map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
for (int i = 0; i < bwt.size(); i++) {
starts[bwt[i]]++;
occ_count_before[bwt[i]][i + 1]++;
for (auto& it : occ_count_before) {
if (i < bwt.size()) {
it.second[i + 1] += it.second[i];
}
}
}

int cf = 0;
for (auto& it : starts) {
if (it.second == 0) {
it.second = -1;
} else {
int temp = it.second;
it.second = cf;
cf += temp;
}
}
}

// Compute the number of occurrences of string


pattern in the text
// given only Burrows-Wheeler Transform bwt of
the text and additional
// information we get from the preprocessing stage
- starts and occ_count_before.
int CountOccurrences(const string& pattern,
const string& bwt,
const map<char, int>& starts,
const map<char, vector<int> >&
occ_count_before) {
// Implement this function yourself
string pattern_cp = pattern;
int top = 0;
int bottom = bwt.size() - 1;
while (top <= bottom) {
if (!pattern_cp.empty()) {
char symbol = pattern_cp.back();
pattern_cp.pop_back();
if (occ_count_before.find(symbol)-
>second[bottom + 1] >
occ_count_before.find(symbol)->second[top]) {
top = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[top];
bottom = starts.find(symbol)->second +
occ_count_before.find(symbol)->second[bottom +
1] - 1;
} else {
return 0;
}
} else {
return bottom - top + 1;
}
}

return 0;
}

int main() {
string bwt;
cin >> bwt;
int pattern_count;
cin >> pattern_count;
// Start of each character in the sorted list of
characters of bwt,
// see the description in the comment about
function PreprocessBWT
map<char, int> starts = {{'$', 0}, {'A', 0}, {'C', 0},
{'G', 0}, {'T', 0}};
// Occurrence counts for each character and each
position in bwt,
// see the description in the comment about
function PreprocessBWT
map<char, vector<int> > occ_count_before = {{'$',
vector<int>(bwt.size() + 1)}, {'A',
vector<int>(bwt.size() + 1)}, {'C',
vector<int>(bwt.size() + 1)}, {'G',
vector<int>(bwt.size() + 1)}, {'T',
vector<int>(bwt.size() + 1)}};;
// Preprocess the BWT once to get starts and
occ_count_before.
// For each pattern, we will then use these
precomputed values and
// spend only O(|pattern|) to find all occurrences
of the pattern
// in the text instead of O(|pattern| + |text|).
PreprocessBWT(bwt, starts, occ_count_before);
for (int pi = 0; pi < pattern_count; ++pi) {
string pattern;
cin >> pattern;
int occ_count = CountOccurrences(pattern, bwt,
starts, occ_count_before);
printf("%d ", occ_count);
}
printf("\n");
return 0;
}
Week 3:
Week 4:

1. #include <cstdio>
#include <iostream>
#include <string>
#include <vector>
using std::cin;
using std::string;
using std::vector;

void compute_prefix(string& p_t, vector<int>& s) {


int border = 0;
for (int i = 1; i < p_t.size(); i++) {
while (border > 0 && p_t[i] != p_t[border]) {
border = s[border - 1];
}
if (p_t[i] == p_t[border]) {
border++;
s[i] = border;
}
if (border == 0) {
s[i] = 0;
}
}
}

// Find all occurrences of the pattern in the text and


return a
// vector with all positions in the text (starting from
0) where
// the pattern starts in the text.
vector<int> find_pattern(const string& pattern,
const string& text) {
vector<int> result;
// Implement this function yourself
string p_t = pattern + '$' + text;
vector<int> s(p_t.size());
compute_prefix(p_t, s);
for (int i = pattern.size() + 1; i < p_t.size(); i++) {
if (s[i] == pattern.size()) {
result.push_back(i - 2 * pattern.size());
}
}
return result;
}

int main() {
string pattern, text;
cin >> pattern;
cin >> text;
vector<int> result = find_pattern(pattern, text);
for (int i = 0; i < result.size(); ++i) {
printf("%d ", result[i]);
}
printf("\n");
return 0;
}

2. #include <cstdio>
#include <iostream>
#include <string>
#include <vector>

using std::cin;
using std::string;
using std::vector;

void compute_prefix(string& p_t, vector<int>& s) {


int border = 0;
for (int i = 1; i < p_t.size(); i++) {
while (border > 0 && p_t[i] != p_t[border]) {
border = s[border - 1];
}
if (p_t[i] == p_t[border]) {
border++;
s[i] = border;
}
if (border == 0) {
s[i] = 0;
}
}
}

// Find all occurrences of the pattern in the text and


return a
// vector with all positions in the text (starting from
0) where
// the pattern starts in the text.
vector<int> find_pattern(const string& pattern,
const string& text) {
vector<int> result;
// Implement this function yourself
string p_t = pattern + '$' + text;
vector<int> s(p_t.size());
compute_prefix(p_t, s);
for (int i = pattern.size() + 1; i < p_t.size(); i++) {
if (s[i] == pattern.size()) {
result.push_back(i - 2 * pattern.size());
}
}
return result;
}

int main() {
string pattern, text;
cin >> pattern;
cin >> text;
vector<int> result = find_pattern(pattern, text);
for (int i = 0; i < result.size(); ++i) {
printf("%d ", result[i]);
}
printf("\n");
return 0;
}
3. #include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#include <utility>

using std::cin;
using std::cout;
using std::endl;
using std::make_pair;
using std::pair;
using std::string;
using std::vector;

void print_int_v(vector<int>& v) {
for (int i = 0; i < v.size(); i++) {
cout << v[i] << " ";
}
cout << endl;
}

int char_to_idx(char x) {
switch (x) {
case '$':
return 0;
case 'A':
return 1;
case 'C':
return 2;
case 'G':
return 3;
case 'T':
return 4;
}
}
void counting_sort_char(const string& text,
vector<int>& order) {
vector<int> c_f(5, 0);
for (int i = 0; i < text.size(); i++) {
c_f[char_to_idx(text[i])]++;
}
for (int i = 1; i < c_f.size(); i++) {
c_f[i] += c_f[i - 1];
}
for (int i = text.size() - 1; i >= 0; i--) {
order[--c_f[char_to_idx(text[i])]] = i;
}
}

void compute_class(const string& text,


vector<int>& order, vector<int>& sort_class) {
int count = 0;
sort_class[order[0]] = count;
for (int i = 1; i < order.size(); i++) {
if (text[order[i]] == text[order[i - 1]])
sort_class[order[i]] = count;
else
sort_class[order[i]] = ++count;
}
}

vector<int> sort_double(const string& text, int l,


vector<int>& order, vector<int>& sort_class) {
vector<int> new_order(text.size());
vector<int> c_f(text.size(), 0);

for (int i = 0; i < order.size(); i++) {


c_f[sort_class[i]]++;
}
for (int i = 1; i < c_f.size(); i++) {
c_f[i] += c_f[i - 1];
}
for (int i = order.size() - 1; i >= 0; i--) {
int start = (order[i] - l + order.size()) %
order.size();
new_order[--c_f[sort_class[start]]] = start;
}
order.clear();
return new_order;
}

vector<int> update_class(vector<int>& order,


vector<int>& sort_class, int l) {
vector<int> new_sort_class(order.size());
int count = 0;
new_sort_class[order[0]] = count;
for (int i = 1; i < order.size(); i++) {
int cur = order[i];
int cur_mid = (order[i] + l) % order.size();
int prev = order[i - 1];
int prev_mid = (order[i - 1] + l) % order.size();
if (sort_class[cur]!= sort_class[prev] ||
sort_class[cur_mid] != sort_class[prev_mid]) {
new_sort_class[order[i]] = ++count;
} else {
new_sort_class[order[i]] = count;
}
}
sort_class.clear();
return new_sort_class;
}

// Build suffix array of the string text and


// return a vector result of the same length as the
text
// such that the value result[i] is the index (0-based)
// in text where the i-th lexicographically smallest
// suffix of text starts.
vector<int> BuildSuffixArray(const string& text) {
vector<int> result;
// Implement this function yourself
vector<int> order(text.size());
counting_sort_char(text, order);

vector<int> sort_class(text.size());
compute_class(text, order, sort_class);

for (int l = 1; l <= text.size(); l *= 2) {


order = sort_double(text, l, order, sort_class);
sort_class = update_class(order, sort_class, l);
}

return order;
}

int main() {
string text;
cin >> text;
vector<int> suffix_array = BuildSuffixArray(text);
for (int i = 0; i < suffix_array.size(); ++i) {
cout << suffix_array[i] << ' ';
}
cout << endl;
return 0;
}

4. #include <algorithm>
#include <cstdio>
#include <map>
#include <string>
#include <utility>
#include <vector>

using std::make_pair;
using std::map;
using std::pair;
using std::string;
using std::vector;

// Data structure to store edges of a suffix tree.


struct Edge {
// The ending node of this edge.
int node;
// Starting position of the substring of the text
// corresponding to the label of this edge.
int start;
// Position right after the end of the substring of
the text
// corresponding to the label of this edge.
int end;

Edge(int node_, int start_, int end_) : node(node_),


start(start_), end(end_) {}
Edge(const Edge& e) : node(e.node), start(e.start),
end(e.end) {}
};

// Build suffix tree of the string text given its suffix


array suffix_array
// and LCP array lcp_array. Return the tree as a
mapping from a node ID
// to the vector of all outgoing edges of the
corresponding node. The edges in the
// vector must be sorted in the ascending order by
the first character of the edge label.
// Root must have node ID = 0, and all other node
IDs must be different
// nonnegative integers.
//
// For example, if text = "ACACAA$", an edge with
label "$" from root to a node with ID 1
// must be represented by Edge(1, 6, 7). This edge
must be present in the vector tree[0]
// (corresponding to the root node), and it should
be the first edge in the vector
// (because it has the smallest first character of all
edges outgoing from the root).
map<int, vector<Edge> >
SuffixTreeFromSuffixArray(
const vector<int>& suffix_array,
const vector<int>& lcp_array,
const string& text) {
map<int, vector<Edge> > tree;
// Implement this function yourself
return tree;
}

int main() {
char buffer[200001];
scanf("%s", buffer);
string text = buffer;
vector<int> suffix_array(text.length());
for (int i = 0; i < text.length(); ++i) {
scanf("%d", &suffix_array[i]);
}
vector<int> lcp_array(text.length() - 1);
for (int i = 0; i + 1 < text.length(); ++i) {
scanf("%d", &lcp_array[i]);
}
// Build the suffix tree and get a mapping from
// suffix tree node ID to the list of outgoing Edges.
map<int, vector<Edge> > tree =
SuffixTreeFromSuffixArray(suffix_array, lcp_array,
text);
printf("%s\n", buffer);
// Output the edges of the suffix tree in the
required order.
// Note that we use here the contract that the root
of the tree
// will have node ID = 0 and that each vector of
outgoing edges
// will be sorted by the first character of the
corresponding edge label.
//
// The following code avoids recursion to avoid
stack overflow issues.
// It uses a stack to convert recursive function to a
while loop.
// The stack stores pairs (node, edge_index).
// This code is an equivalent of
//
// OutputEdges(tree, 0);
//
// for the following _recursive_ function
OutputEdges:
//
// void OutputEdges(map<int, vector<Edge> >
tree, int node_id) {
// const vector<Edge>& edges = tree[node_id];
// for (int edge_index = 0; edge_index <
edges.size(); ++edge_index) {
// printf("%d %d\n", edges[edge_index].start,
edges[edge_index].end);
// OutputEdges(tree, edges[edge_index].node);
// }
// }
//
vector<pair<int, int> > stack(1, make_pair(0, 0));
while (!stack.empty()) {
pair<int, int> p = stack.back();
stack.pop_back();
int node = p.first;
int edge_index = p.second;
if (!tree.count(node)) {
continue;
}
const vector<Edge>& edges = tree[node];
if (edge_index + 1 < edges.size()) {
stack.push_back(make_pair(node, edge_index +
1));
}
printf("%d %d\n", edges[edge_index].start,
edges[edge_index].end);

stack.push_back(make_pair(edges[edge_index].node
, 0));
}
return 0;
}

You might also like