function [] = sam2Mat_strandSpec_1dir(dir,inputFile,flip,name)
%go over each read or pair, and add it to the data vectors
verbose(sprintf('Reading the input file %s',inputFile),10);

if ~exist(inputFile,'file')
    error('prog:input','Input file %s is missing. ',inputFile);
elseif ~strcmp(inputFile(end-3:end),'.sam')
    error('prog:input','Input file %s should be a sam file. ',inputFile);
end

%% Initiate variables
load(sprintf('%s/chrLen.mat',dir),'chromLengths');

chrNum = length(chromLengths)-1;
DataFconv = cell(1,chrNum);
DataRconv = cell(1,chrNum);
DataF = cell(1,chrNum);
DataR = cell(1,chrNum);

SubFread1 = cell(1,chrNum);
SubRread1 = cell(1,chrNum);
DataFread1 = cell(1,chrNum);
DataRread1 = cell(1,chrNum);

for c=1:chrNum
    DataF{c} = zeros(1,chromLengths(c),'single');
    DataR{c} = zeros(1,chromLengths(c),'single');
    DataFread1{c} = zeros(1,chromLengths(c),'single');
    DataRread1{c} = zeros(1,chromLengths(c),'single');
    
    DataFconv{c} = zeros(1,chromLengths(c),'single');
    DataRconv{c} = zeros(1,chromLengths(c),'single');
end

%% Some libraries are flipped
% In the dUTP and NNSR libraries, the second read correspond to the
% transcribed strand, and not the first, so the data should be
% flipped.
if length(name)>=4 && (strcmp(name(1:4),'dUTP') || strcmp(name(1:4),'NNSR') || strcmp(name(1:4),'Park'))
    verbose('data is flipped\n',10);
    flip = 1;
end

%% parse the sam file
% read the qltout file, line by line, and parse the mapping

f1 = fopen(inputFile);
chrCellDef = {'ref|NC_001133|','ref|NC_001134|','ref|NC_001135|','ref|NC_001136|','ref|NC_001137|','ref|NC_001138|','ref|NC_001139|','ref|NC_001140|','ref|NC_001141|','ref|NC_001142|','ref|NC_001143|','ref|NC_001144|','ref|NC_001145|','ref|NC_001146|','ref|NC_001147|','ref|NC_001148|','ref|NC_001224|'};
chrCell = {};
i=1;
while(1)
    if (mod(i,1e6)==0), verbose(sprintf('currently at line %d\n',i),20);end
    i = i+1;
    t1 = fgetl(f1); if ~ischar(t1), break; end
    if strcmp(t1(1:3),'@SQ'),
        tmpR = regexp(t1,'\t','split');
        chrCell = [chrCell tmpR{2}(find(tmpR{2}==':',1,'first')+1:end)]; %#ok<AGROW>
    end
    if t1(1)=='@', continue; end
    if length(chrCell)>1,
        useChr = chrCell;
    else
        useChr = chrCellDef;
    end
    data = readSamLine(t1,useChr);
    
    tmp = bitget(data.Flag,1:11);
    flags = cell2struct(num2cell(tmp'),{'pairedInSeq','pairIsMapped','queryUnmapped','mateUnmapped','queryStrand','mateStrand','isRead1','isRead2','notPrimaryAlignment','failesQuality','isDuplicate'});
    
    paired = flags.pairedInSeq;
    
    if (paired && ~flags.pairIsMapped) || ...
            (paired && flags.queryStrand==flags.mateStrand) || ...
            (paired && abs(data.InsertSize)>500) || ...
            flags.notPrimaryAlignment
        % paired lib, and mate is not mapped,
        % paired lib, and mate is on the same strand
        % paired lib, and insert size is longer than 500
        % this is not the primary mapping
        continue;
    end
    
    ch = data.ReferenceName;
    st1 = data.Position;
    [cigar.nums cigar.type] = strread(data.CigarString,'%d%[MIDNSHP]');
    cigar.ind = ~cellfun(@isempty,regexp(cigar.type,'[MDNX]'));
    matchLen = sum(cigar.nums(cigar.ind));
    en1 = st1+matchLen;
    wa1 = ~flags.queryStrand; %(0=forward, 1=reverse)
    
    if flags.isRead2 %IF THIS IS READ2, you should flip it now.
        wa1 = ~wa1;
    end
    
    if flip %IF THIS IS flipped, you should flip it now. (it might be flipped twice, for read2 and flip flag)
        wa1 = ~wa1;
    end
    
    
    if ch>chrNum || isempty(ch), continue,end
    
    num = 2;
    ind = st1:en1;
    indRead1 = [];
    if flags.isRead1
        indRead1 = st1:(st1+36);
    end
    
    if paired
        num = 1;
        if flip && flags.isRead2
            indRead1 = st1:(st1+36);
        end
    end
    
    if ind(end)>length(DataF{ch}) || ind(1)<=0, continue; end
    if wa1
        DataFconv{ch}(ind) = DataFconv{ch}(ind)+num;
        DataF{ch}(ind(1)) = DataF{ch}(ind(1))+1; %counting this one only as starts, no need to do internal normalization
        if flags.isRead1
            DataFread1{ch}(indRead1(1)) = DataFread1{ch}(indRead1(1))+1; %counting this one only as starts, no need to do internal normalization
        end
    else
        DataRconv{ch}(ind) = DataRconv{ch}(ind)+num;
        DataR{ch}(ind(end)) = DataR{ch}(ind(end))+1; %counting this one only as starts, no need to do internal normalization
        if flags.isRead1
            DataRread1{ch}(indRead1(end)) = DataRread1{ch}(indRead1(end))+1; %counting this one only as starts, no need to do internal normalization
        end
    end
end
fclose(f1);
%% save for now (before sampling)
save(sprintf('%s.mat',name),'DataF', 'DataR','DataFconv','DataRconv');

%% total number of reads mapped
sumF = 0; sumR = 0;
sumFconv = 0; sumRconv = 0;
for c=1:chrNum
    sumF = sumF + sum(DataF{c});
    sumR = sumR + sum(DataR{c});
    sumFconv = sumFconv + sum(DataFconv{c});
    sumRconv = sumRconv + sum(DataRconv{c});
end

%% prepare to subsample
% calc relative numbers for chromosoms
totalReads = zeros(2,chrNum);
for c=1:chrNum
    totalReads(1,c) = sum(DataFread1{c});
    totalReads(2,c) = sum(DataRread1{c});
end
totalFrac = totalReads./sum(totalReads(:));

%% subsample 2.5M reads
% this step takes a while

subN = 2500000;
noSub = 0;
if sum(totalReads(:))<subN, noSub = 1; end
for c=1:chrNum
    if noSub
        SubFread1{c} = DataFread1{c};
    else
        verbose(sprintf('subsampling chr%d (F strand)\n',c),20);
        SubFread1{c} = subSampStarts(DataFread1{c},ceil(totalFrac(1,c)*subN));
    end
    if noSub
        SubRread1{c} = DataRread1{c};
    else
        verbose(sprintf('subsampling chr%d (R strand)\n',c),20);
        SubRread1{c} = subSampStarts(DataRread1{c},ceil(totalFrac(2,c)*subN));
    end
end

%% convolve Sub
chrLengths = cellfun(@length,DataF);
SubFread1_ext36 = cell(size(SubFread1));
SubRread1_ext36 = cell(size(SubFread1));
for c=1:chrNum
    tF = conv(SubFread1{c},ones(1,36));
    SubFread1_ext36{c} = tF(1:chrLengths(c));
    tR = conv(SubRread1{c},ones(1,36));
    SubRread1_ext36{c} = tR(36:end);
end


%% save into mat files
save(sprintf('%s.mat',name),'DataF', 'DataR','DataFconv','DataRconv',...
    'sumF', 'sumR', 'sumFconv', 'sumRconv', ...
    'SubFread1','SubRread1','DataFread1', 'DataRread1','paired',...
    'SubFread1_ext36','SubRread1_ext36');

end


%%

function data = readSamLine(tline,chrCell)


[rtext pos] = textscan(tline, '%s%u16%s%u32%u8%s%s%u32%d32%s%s',1, 'Delimiter', '\t');

rtext([1 3 6 7 10 11])=[rtext{[1 3 6 7 10 11]}];


optional_fields = regexp(tline(pos+1:end), '([^\s:]{2}):([AifZH]):(\S+)\t?', 'tokens');

if ~isempty(optional_fields)
    optional_fields = [optional_fields{:}]';
    tags = optional_fields(1:3:end);
    type = [optional_fields{2:3:end}];
    value = optional_fields(3:3:end);
    
    integers = type == 'i';
    if sum(integers)
        s = {' '};
        spaces = s(ones(1, sum(integers)));
        tmp = [value(integers)'; spaces];
        vals = [tmp{:}];
        value(integers) = num2cell(int32(str2num(vals))); %#ok<ST2NM>
    end
    
    floats = type == 'f';
    if sum(floats)
        s = {' '};
        spaces = s(ones(1, sum(floats)));
        tmp = [value(floats)'; spaces];
        vals = [tmp{:}];
        value(floats) = num2cell(single(str2num(vals))); %#ok<ST2NM>
    end
    
    hexstrings = strfind(type, 'H');
    if ~isempty(hexstrings)
        for hex_idx = 1:numel(hexstrings)
            value{hexstrings(hex_idx)} =  uint8(hex2dec(reshape(value{hexstrings(hex_idx)}, 2, [])')');
        end
    end
    
    if numel(value)~=numel(tags)
        error('Bioinfo:samread:ErrorInFile', 'Error in file at line %d.', n_headerrows+row)
    end
    
    data = cell2struct([rtext'; {cell2struct(value, tags)}], {'QueryName', 'Flag', 'ReferenceName', 'Position', ...
        'MappingQuality', 'CigarString', 'MateReferenceName', 'MatePosition', 'InsertSize', 'Sequence', 'Quality', 'Tags'});
    
else
    rtext(end+1) = {[]};
    data = cell2struct(rtext', {'QueryName', 'Flag', 'ReferenceName', 'Position', 'MappingQuality', 'CigarString', ...
        'MateReferenceName', 'MatePosition', 'InsertSize', 'Sequence', 'Quality', 'Tags'});
end %isempty(optional_fields)


data.ReferenceName = find(ismember(chrCell,data.ReferenceName));
end

%%
