題目內容
題目頁面: https://app.codility.com/programmers/lessons/5-prefix_sums/genomic_range_query/
題目理解
天啊~我最討厭有故事性的題目了如汽車或DNA,好長阿~~~讓我認真看他在說啥!
DNA和factor可表示為: {‘A’:1,’C’:2,’G’:3,’T’:4},S = CAGCCTA,P=[2,5,0],Q=[4,5,6]
然後要找第 K 個查詢(0 ≤ K < M),找到 P[K] 和 Q[K](含)之間的 DNA 序列中包含的核苷酸的最小影響因子。也就是要計算出一個長度為M(M為P、Q的長度)的陣列,然後返回最小影響因子
例如M[0] = Math.min(S[2],S[3]) = Math.min(‘G’,’C’) = Math.min(2,3) =2
陷阱在這: each element of arrays P and Q is an integer within the range [0..N – 1]; => 所以不一定一樣長
解題思路
先用暴力法寫出一個可行解
// you can write to stdout for debugging purposes, e.g.
// console.log('this is a debug message');
function solution(S, P, Q) {
// Implement your solution here
factor = {'A':1,'C':2,'G':3,'T':4}
var length_M = Math.min(P.length, Q.length)
var M = []
for( var i = 0 ; i < length_M; i++){
var inclusive_S = S.slice(P[i], Q[i]+1).split("")
var minimum = Infinity
for(var j =0;j<inclusive_S.length;j++){
//console.log(minimum, factor[inclusive_S[j]])
if(minimum > factor[inclusive_S[j]]){
minimum = factor[inclusive_S[j]]
}
}
M.push(minimum)
}
return M
}
因為邏輯有一點複雜,先把暴力解送出驗證一下正確性,之後再來優化
好的!!62%!!至少邏輯正確,那我們在想想要如何優化。讓我先觀察一下是死在那些效能測試上。分別是GGGGGG..??..GGGGGG..??..GGGGGG、large random string, length、all max ranges,首先先看到almost_all_same_letters,所以先把CAGC對應數字的部分做掉,如: S = CAGCCTA = [2,1,3,2,2,4,1]
下面為改善後的程式碼,可以減少重複將字串轉換為數字的部分。雖然沒有很樂觀,因為還是有先暫存中間值的可能性,先送看看有沒有改善,並評估一下效能瓶頸是否有改變。
function solution(S, P, Q) {
// Implement your solution here
factor = {'A':1,'C':2,'G':3,'T':4}
var length_M = Math.min(P.length, Q.length)
var S = S.split("").map(x => factor[x])
var M = []
for( var i = 0 ; i < length_M; i++){
M.push(Math.min(...S.slice(P[i], Q[i]+1)))
}
return M
}
結果並沒有改善
— M = 0
minimum: Infinity
minimum: 3
minimum: 2
— M = 1
minimum: Infinity
— M = 2
minimum: Infinity
minimum: 2
minimum: 1
minimum: 1
minimum: 1
minimum: 1
minimum: 1
觀察一下現在的迴圈狀況,來找尋可改善的重複步驟,首先最明顯的,就是若遇到1應該可直接break,但我猜這影響不大。另外我發現,因為factor = {‘A’:1,’C’:2,’G’:3,’T’:4}是固定的,且最多就是4種,所以與其在那邊一個個轉換並且比較大小,還不如用4個if…else來從小尋找到最大的值。
function solution(S, P, Q) {
// Implement your solution here
//factor = {'A':1,'C':2,'G':3,'T':4}
var length_M = Math.min(P.length, Q.length)
var M = []
for( var i = 0 ; i < length_M; i++){
var inclusive_S = S.slice(P[i], Q[i]+1).split("")
//console.log(inclusive_S)
if(inclusive_S.indexOf('A') >= 0) M.push(1)
else if(inclusive_S.indexOf('C') >= 0) M.push(2)
else if(inclusive_S.indexOf('G') >= 0) M.push(3)
else M.push(4)
}
return M
}
仍然沒有好轉
事已至此,感覺卡在一個切入觀點了,所以只好上網搜尋一下: http://myjavawar.blogspot.com/2018/10/genomicrangequery.html
原來要改變一下思考方向,要把原本的思維從”在一個已被切好的陣列中,搜尋某字元有沒有出現過”的思維,改成”計算從0到M以來某字元出現的次數”。接著若是陣列要從N開始,就直接把M-N,就會是從N~M之間某字元出現的次數”了
function solution(S, P, Q) {
// Implement your solution here
//factor = {'A':1,'C':2,'G':3,'T':4}
var length_M = Math.min(P.length, Q.length)
var M = []
var showTimes = []
var currentShowTime = {'A':0,'C':0,'G':0,'T':0}
for( var i = 0 ; i < S.length; i++){
currentShowTime[S[i]]++
showTimes.push({'A':currentShowTime['A'],'C':currentShowTime['C'],'G':currentShowTime['G'],'T':currentShowTime['T']})
}
for( var i = 0 ; i < length_M; i++){
//var inclusive_S = S.slice(P[i], Q[i]+1).split("")
//console.log(inclusive_S)
if(showTimes[Q[i]]['A'] - showTimes[P[i]]['A'] > 0) M.push(1)
else if(showTimes[Q[i]]['C'] - showTimes[P[i]]['C'] > 0) M.push(2)
else if(showTimes[Q[i]]['G'] - showTimes[P[i]]['G'] > 0) M.push(3)
else M.push(4)
}
return M
}
結果雖然效率過了,但是會有錯誤的結果,檢查了一下,這是因為當P[i]與Q[i]一樣大小時,就會出現錯誤,所以要加一個if用別的方法找答案
function solution(S, P, Q) {
// Implement your solution here
//factor = {'A':1,'C':2,'G':3,'T':4}
var length_M = Math.min(P.length, Q.length)
var M = []
var showTimes = []
var currentShowTime = {'A':0,'C':0,'G':0,'T':0}
for( var i = 0 ; i < S.length; i++){
currentShowTime[S[i]]++
showTimes.push({'A':currentShowTime['A'],'C':currentShowTime['C'],'G':currentShowTime['G'],'T':currentShowTime['T']})
}
//console.log(showTimes)
for( var i = 0 ; i < length_M; i++){
//var inclusive_S = S.slice(P[i], Q[i]+1).split("")
//console.log(inclusive_S)
if(P[i] == Q[i]){
var factor = {'A':1,'C':2,'G':3,'T':4}
M.push(factor[S[P[i]]])
}else{
if(showTimes[Q[i]]['A'] - showTimes[P[i]]['A'] > 0) M.push(1)
else if(showTimes[Q[i]]['C'] - showTimes[P[i]]['C'] > 0) M.push(2)
else if(showTimes[Q[i]]['G'] - showTimes[P[i]]['G'] > 0) M.push(3)
else if(showTimes[Q[i]]['T'] - showTimes[P[i]]['T'] > 0) M.push(4)
}
}
return M
}
結果還是有錯
仔細檢查了一下,錯誤的情況是solution(‘AC’, [0, 0, 1], [0, 1, 1]),發現P[i] 應該要減1,但這樣子P[i]==0時會出現問題,所以要給個預設值
function solution(S, P, Q) {
// Implement your solution here
//factor = {'A':1,'C':2,'G':3,'T':4}
var length_M = Math.min(P.length, Q.length)
var M = []
var showTimes = []
var currentShowTime = {'A':0,'C':0,'G':0,'T':0}
for( var i = 0 ; i < S.length; i++){
currentShowTime[S[i]]++
showTimes.push({'A':currentShowTime['A'],'C':currentShowTime['C'],'G':currentShowTime['G'],'T':currentShowTime['T']})
}
//console.log(showTimes)
for( var i = 0 ; i < length_M; i++){
var inclusive_S = S.slice(P[i], Q[i]+1).split("")
//console.log(inclusive_S)
if(P[i] == Q[i]){
var factor = {'A':1,'C':2,'G':3,'T':4}
M.push(factor[S[P[i]]])
}else{
var showTimesMax = showTimes[Q[i]]
var showTimesMin = P[i] == 0 ? {'A':0,'C':0,'G':0,'T':0}:showTimes[P[i]-1]
//console.log(showTimesMax, showTimesMin)
if(showTimesMax['A'] - showTimesMin['A'] > 0) M.push(1)
else if(showTimesMax['C'] - showTimesMin['C'] > 0) M.push(2)
else if(showTimesMax['G'] - showTimesMin['G'] > 0) M.push(3)
else M.push(4)
}
}
return M
}
好了~終於100%了,真是個身心俱疲的一題阿! (https://app.codility.com/demo/results/trainingYXRWFN-H4J/)