Compute all permutations of "CTCCT" and concatenate them to a regex:
CCCTT|CCTCT|CCTTC|CTCCT|CTCTC|CTTCC|TCCCT|TCCTC|TCTCC|TTCCC
This pattern can be optimized:
C(?:C(?:T(?:CT|TC)|CTT)|T(?:C(?:CT|TC)|TCC))|T(?:C(?:C(?:CT|TC)|TCC)|TCCC)
var regex = new RegExp(/C(?:C(?:T(?:CT|TC)|CTT)|T(?:C(?:CT|TC)|TCC))|T(?:C(?:C(?:CT|TC)|TCC)|TCCC)/g);
var string = "TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC";
console.log(regex.exec(string));
This pattern doesn't find overlapping matches, e. g. there would only be one match in CCCTTCCC.
To find overlapping matches, use lookahead:
C(?=C(?=T(?=CT|TC)|CTT)|T(?=C(?=CT|TC)|TCC))|T(?=C(?=C(?=CT|TC)|TCC)|TCCC)
var regex = new RegExp(/C(?=C(?=T(?=CT|TC)|CTT)|T(?=C(?=CT|TC)|TCC))|T(?=C(?=C(?=CT|TC)|TCC)|TCCC)/g);
var string = "CCCTTCCC";
while ((match = regex.exec(string)) != null) {
console.log(match.index, string.substring(match.index, match.index + 5));
}
Regex can only deal with a fairly limited number of permutations. If you want to match segments of possibly arbitrary size, use a non-regex solution:
function c3t2_optimized(str) {
var c = 0, t = 0;
for (var i = 0; i < str.length; ++i) {
var last = str.charAt(i);
if (last == 'C') ++c;
else if (last == 'T') ++t;
if (i > 4) {
var first = str.charAt(i - 5);
if (first == 'C') --c;
else if (first == 'T') --t;
}
if (c == 3 && t == 2) return i - 4;
}
return -1;
}
var string = "TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC";
console.log(c3t2_optimized(string));
Or the same as above, just as a generator stepping through all possibly overlapping matches:
function* c3t2_optimized(str) {
var c = 0, t = 0;
for (var i = 0; i < str.length; ++i) {
var last = str.charAt(i);
if (last == 'C') ++c;
else if (last == 'T') ++t;
if (i > 4) {
var first = str.charAt(i - 5);
if (first == 'C') --c;
else if (first == 'T') --t;
}
if (c == 3 && t == 2) yield i - 4;
}
}
var string = "CCCTTCCC";
for (i of c3t2_optimized(string)) {
console.log(i, string.substring(i, i + 5));
}
Performance comparison: https://jsfiddle.net/24qguege/7/
Firefox 47: