#ifndef POLYNOMIAL_HH
#define POLYNOMIAL_HH
#ifndef CONVOLUTION_HH
#define CONVOLUTION_HH
#ifndef NUMBER_THEORY_HH
#define NUMBER_THEORY_HH
#include<array>
#include<functional>
#include<limits>
#include<numeric>
#include<ostream>
#include<type_traits>
#include<unordered_map>
#include<utility>
#include<vector>
#include<cmath>
#define U32 unsigned
#define I64 long long
#define U64 unsigned long long
struct PrimeEnumerator{
static std::pair<std::vector<int>, std::function<bool(int)>> leq(int n){
// assert(n >= 0);
int b = n/30+1;
std::vector<bool> isp(b<<3, true);
std::vector<int> primes{2, 3, 5};
primes.reserve(UB*30*b/std::log(30*b));
isp[0] = false;
for(int i=0; i<b; ++i){
for(int j=0; j<8; ++j){
I64 d = 30ll*i + r[j];
if(d == 1) continue;
if(isp[i<<3|j]) primes.push_back(d);
for(int k=3; ; ++k){
I64 pd = primes[k]*d;
if(pd >= 30*b) break;
isp[pd/30<<3|rinv[pd%30]] = false;
if(d%primes[k] == 0) break;
}
}
}
while(!primes.empty() && primes.back()>n){
primes.pop_back();
}
auto is_prime = [isp=move(isp)](int i){
if(i <= 1) return false;
if(i==2 || i==3 || i==5) return true;
if(i%2==0 || i%3==0 || i%5==0) return false;
return (bool)isp[i/30<<3|rinv[i%30]];
};
return {primes, is_prime};
}
private:
static constexpr int r[] = {1, 7, 11, 13, 17, 19, 23, 29};
static constexpr signed char rinv[] = {
-1, 0, -1, -1, -1, -1, -1, 1, -1, -1,
-1, 2, -1, 3, -1, -1, -1, 4, -1, 5,
-1, -1, -1, 6, -1, -1, -1, -1, -1, 7
};
static constexpr double UB = 1.25506;
};
inline std::vector<int> phi_table_leq(int n){
// assert(n >= -1);
std::vector<int> phi(n+1u);
std::vector<int> primes;
iota(phi.begin(), phi.end(), 0u);
for(I64 i=2; i<=n; ++i){
if(phi[i] == i){
--phi[i];
primes.push_back(i);
}
for(int p: primes){
if(p*i > n) break;
if(i%p){
phi[p*i] = (p-1)*phi[i];
}else{
phi[p*i] = p*phi[i];
break;
}
}
}
return phi;
}
inline std::vector<signed char> mu_table_leq(int n){
// assert(n >= 1);
std::vector<signed char> mu(n+1u, -2);
std::vector<int> primes;
mu[0] = 0; mu[1] = 1;
for(I64 i=2; i<=n; ++i){
if(mu[i] == -2){
mu[i] = -1;
primes.push_back(i);
}
for(int p: primes){
if(p*i > n) break;
if(i%p){
mu[p*i] = -mu[i];
}else{
mu[p*i] = 0;
break;
}
}
}
return mu;
}
constexpr std::array<I64, 2> extgcd(I64 a, I64 b){
// assert(a>=0 && b>=0);
if(b == 0) return {1, 0};
auto [x, y] = extgcd(b, a%b);
return {y, x-a/b*y};
}
constexpr I64 inv_mod(I64 a, I64 mod){
// assert(mod>1 && std::gcd(mod, a%mod)==1);
if((a%=mod) < 0) a += mod;
auto [_, x] = extgcd(mod, a);
return x<0? x+mod: x;
}
constexpr U32 pow_mod(I64 a, I64 n, U32 mod){
// assert(mod > 0);
if(mod == 1) return 0;
if((a%=mod) < 0) a += mod;
if(n < 0) a = inv_mod(a, mod);
U64 res = 1, t = a;
while(n){
if(n%2) res = res*t%mod;
if(n/=2) t = t*t%mod;
}
return res;
}
constexpr bool miller_rabin(U32 n, U32 witness){
// assert(n>=3 && n%2==0);
if((witness%=n) == 0) return true;
U32 d = n-1;
int r = 0;
while(d%2 == 0){
d>>=1, ++r;
}
U32 x = pow_mod(witness, d, n);
if(x==1 || x==n-1) return true;
for(int cnt=r-1; cnt-->0; ){
x = (U64)x*x%n;
if(x == n-1) return true;
}
return false;
}
constexpr bool primality_test(U32 n) noexcept{
if(n <= 1) return false;
if(n == 2) return true;
if(n%2 == 0) return false;
return miller_rabin(n, 2) && (n < 2047 || miller_rabin(n, 7) && miller_rabin(n, 61));
}
constexpr bool primality_test(int n) noexcept{
return n<0? false: primality_test((U32)n);
}
constexpr int jacobi_symbol(I64 a, I64 b){
// assert(b>=1 && b%2==1);
if((a%=b) < 0) a += b;
bool ok = true;
while(a){
bool n2 = false;
while(a%2 == 0){
a>>=1, n2=!n2;
}
if(n2 && (b%8==3 || b%8==5)){
ok = !ok;
}
if(a%4==3 && b%4==3){
ok = !ok;
}
int t = b%a; b = a; a = t;
}
return b>1? 0: ok? 1: -1;
}
constexpr I64 sqrt_modp(I64 a, U32 p){
// assert(primality_test(p));
if((a%=p) < 0) a += p;
if(a <= 1) return a;
if(jacobi_symbol(a, p) == -1) return -1;
if(p%4 == 3) return pow_mod(a, (p+1ll)/4, p);
I64 k = 1;
while(jacobi_symbol(k*k-a, p) == 1) ++k;
if(k*k%p == a) return k;
U64 w = ((k*k-a)%p+p)%p;
U64 rc = 1, rx = 0, tc = k, tx = 1;
for(U64 i=(p+1ll)/2; i>0; ){
if(i%2){
U64 rc2 = (rc*tc%p + rx*tx%p*w)%p;
rx = (rc*tx%p + rx*tc)%p;
rc = rc2;
}
if(i/=2){
U64 tc2 = (tc*tc%p + tx*tx%p*w)%p;
tx = (tc<p-tc? 2*tc: tc-(p-tc))*tx%p;
tc = tc2;
}
}
if(rc > p-rc) rc = p-rc;
return rc;
}
constexpr bool primitive_root_modp_check(U32 r, U32 p){
// assert(primality_test(p));
if(r%p == 0) return false;
U32 d = p-1;
for(I64 q=2; q*q<=d; ++q) if(d%q == 0){
do d/=q; while(d%q==0);
if(pow_mod(r, (p-1)/q, p) == 1) return false;
}
if(d>1 && pow_mod(r, (p-1)/d, p) == 1){
return false;
}
return true;
}
constexpr U32 primitive_root_modp(U32 p){
// assert(primality_test(p));
for(U32 i=1; ; ++i){
if(primitive_root_modp_check(i, p)){
return i;
}
}
}
inline I64 discrete_log(I64 base, I64 power, U32 mod){
// assert(mod > 0);
if((base%=mod) < 0) base += mod;
if((power%=mod) < 0) power += mod;
if(mod==1 || power==1) return 0;
U32 s = 1, d = 1, res = 0;
while(1){
s = (U64)base*s%mod;
++res;
if(s == power) return res;
U32 d2 = std::gcd(s, mod);
if(d2 == d){
if(power%d) return -1;
s /= d; power /= d; mod /= d;
break;
}
d = d2;
}
U32 m = std::ceil(std::sqrt(mod));
std::unordered_map<U32, U32> bs(m);
for(U32 i=0; i<m; ++i){
bs.emplace(s, i);
s = (U64)base*s%mod;
}
U32 gs = pow_mod(base, -(I64)m, mod);
for(U32 i=0; i<m; ++i){
if(auto it=bs.find(power); it!=bs.end()){
return res + i*m + it->second;
}
power = (U64)gs*power%mod;
}
return -1;
}
template<U32 N> struct RingZn{
using Base = std::conditional_t<(N<=(U32)std::numeric_limits<int>::max()), int, U32>;
static constexpr U32 mod() noexcept{
return N;
}
RingZn() = default;
template<typename INT, typename = std::enable_if_t<std::is_integral_v<INT>>>
constexpr RingZn(INT a) noexcept: a((a%=(I64)N)<0? (I64)a+N: a){}
template<typename INT, typename = std::enable_if_t<std::is_arithmetic_v<INT>>>
constexpr explicit operator INT() const noexcept{
return a;
}
constexpr RingZn operator+() const noexcept{
return *this;
}
constexpr RingZn operator-() const noexcept{
return a? (Base)N-a: 0;
}
constexpr RingZn &operator+=(RingZn rhs) noexcept{
a = a<(Base)N-rhs.a? a+rhs.a: a-((Base)N-rhs.a);
return *this;
}
constexpr RingZn &operator-=(RingZn rhs) noexcept{
a = a<rhs.a? a+((Base)N-rhs.a): a-rhs.a;
return *this;
}
constexpr RingZn &operator*=(RingZn rhs) noexcept{
a = (U64)a * rhs.a % N;
return *this;
}
constexpr RingZn &operator/=(RingZn rhs) noexcept{
a = (U64)a * inv_mod(rhs.a, N) % N;
return *this;
}
constexpr RingZn &operator++() noexcept{
a = a==(Base)N-1? 0: a+1;
return *this;
}
constexpr RingZn operator++(int) noexcept{
RingZn res = *this; ++*this;
return res;
}
constexpr RingZn &operator--() noexcept{
a = a? a-1: (Base)N-1;
return *this;
}
constexpr RingZn operator--(int) noexcept{
RingZn res = *this; --*this;
return res;
}
#define DEF_BIOP(op)\
friend constexpr RingZn operator op(RingZn lhs, RingZn rhs) noexcept{\
return lhs op##= rhs;\
}
DEF_BIOP(+)
DEF_BIOP(-)
DEF_BIOP(*)
DEF_BIOP(/)
#undef DEF_BIOP
friend constexpr bool operator==(RingZn lhs, RingZn rhs) noexcept{
return (U32)lhs == (U32)rhs;
}
friend constexpr bool operator!=(RingZn lhs, RingZn rhs) noexcept{
return !(lhs == rhs);
}
private:
Base a = 0;
};
template<U32 N> RingZn<N> pow(RingZn<N> a, I64 b){
return pow_mod((I64)a, b, N);
}
template<U32 N> std::ostream &operator<<(std::ostream &os, RingZn<N> a){
return os << (U32)a;
}
template<typename> struct RingZnMod: std::integral_constant<U32, 0>{};
template<U32 N> struct RingZnMod<RingZn<N>>: std::integral_constant<U32, N>{};
#undef U64
#undef I64
#undef U32
#endif
#ifndef UTIL_HH
#define UTIL_HH
#include<iterator>
template<typename It>
using DerefType = typename std::iterator_traits<It>::value_type;
#endif
#include<algorithm>
#include<complex>
#include<utility>
#include<vector>
#include<cmath>
#define U32 unsigned
#define I64 long long
#define U64 unsigned long long
template<U32 P, U32 R> struct BasicNtt{
static constexpr U32 prime() noexcept{
return P;
}
static constexpr U32 primitive_root() noexcept{
return R;
}
static constexpr int lg_max_size() noexcept{
if(primality_test(P) && primitive_root_modp_check(R, P)){
U32 d = P-1, res = 0;
if((d&65535) == 0) d>>=16, res|=16;
if((d&255) == 0) d>>=8, res|=8;
if((d&15) == 0) d>>=4, res|=4;
if((d&3) == 0) d>>=2, res|=2;
if((d&1) == 0) res|=1;
return res;
}
return -1;
}
static constexpr int max_size() noexcept{
int lg = lg_max_size();
return lg>=0? 1<<lg: 0;
}
template<typename It>
static void transform_in_place(bool inverse, It x, int n){
// assert(n>=0 && (n&-n)==n && n<=max_size());
using T = DerefType<It>;
for(int i=1, j=0; i<n; ++i){
for(int k=n>>1; !((j^=k)&k); k>>=1);
if(i < j) std::swap(x[i], x[j]);
}
RingZn<P> z2[lg_max_size()+1] = {pow_mod(R, inverse? (P-1)/n: (P-1)-(P-1)/n, P)};
int lg = 0;
for(; z2[lg]!=1; ++lg){
z2[lg+1] = z2[lg]*z2[lg];
}
std::reverse(z2, z2+lg+1);
for(int i=1; 1<<i<=n; ++i){
for(int j=0; j<n; j+=1<<i){
RingZn<P> t = 1;
for(int k=0; k<1<<(i-1); ++k){
RingZn<P> u = x[j|k], v = t*x[j|1<<(i-1)|k];
x[j|k] = static_cast<T>(u+v);
x[j|1<<(i-1)|k] = static_cast<T>(u-v);
t *= z2[i];
}
}
}
if(inverse){
RingZn<P> factor = P-(P-1)/n;
for(int i=0; i<n; ++i){
x[i] = static_cast<T>(factor*x[i]);
}
}
}
template<typename It>
static std::vector<RingZn<P>> transform(bool inverse, It b, It e, int n){
std::vector<RingZn<P>> x(n);
int d = e-b<n? e-b: n;
copy(b, b+d, x.begin());
transform_in_place(inverse, x.begin(), n);
return x;
}
};
template<U32 P> using Ntt = BasicNtt<P, primitive_root_modp(P)>;
using NttI32 = Ntt<(15<<27|1)>;
using NttU32 = Ntt<(3u<<30|1)>;
template<typename T> struct BasicFft{
BasicFft() = default;
template<typename It> void transform_in_place(bool inverse, It x, int n){
// assert(n>=0 && (n&-n)==n);
for(int i=1, j=0; i<n; ++i){
for(int k=n>>1; !((j^=k)&k); k>>=1);
if(i < j) std::swap(x[i], x[j]);
}
if((int)table.size() < n) calc_exp(n);
for(int i=1; 1<<i<=n; ++i){
int d = inverse? table.size()>>i: table.size()-(table.size()>>i);
for(int j=0; j<n; j+=1<<i){
for(int k=0, a=0; k<1<<(i-1); ++k, a=a+d&table.size()-1){
std::complex<T> u = x[j|k], v = table[a]*x[j|1<<(i-1)|k];
x[j|k] = u+v;
x[j|1<<(i-1)|k] = u-v;
}
}
}
if(inverse){
for(int i=0; i<n; ++i){
x[i] /= n;
}
}
}
template<typename It>
std::vector<std::complex<T>> transform(bool inverse, It b, It e, int n){
std::vector<std::complex<T>> x(n);
copy_n(b, e-b<n? e-b: n, x.begin());
transform_in_place(inverse, x.begin(), n);
return x;
}
private:
static constexpr T PI = 3.14159265358979323846l;
void calc_exp(int n){
if(n == 1){
table = {1};
}else if(n == 2){
table = {1, -1};
}else if(n == 4){
table = {1, {0, 1}, -1, {0, -1}};
}else if(n >= 8){
table.resize(n);
table[0] = 1;
for(int i=1; i<n/8; ++i){
table[i] = {std::cos(2*PI*i/n), std::sin(2*PI*i/n)};
}
T cos45 = std::sqrt((T).5);
table[n/8] = {cos45, cos45};
for(int i=n/8+1; i<=n/4; ++i){
table[i] = {table[n/4-i].imag(), table[n/4-i].real()};
}
for(int i=n/4+1; i<=n/2; ++i){
table[i] = {-table[n/2-i].real(), table[n/2-i].imag()};
}
for(int i=n/2+1; i<n; ++i){
table[i] = -table[i-n/2];
}
}
}
std::vector<std::complex<T>> table;
};
using Fft = BasicFft<double>;
using Fftf = BasicFft<float>;
using Fftl = BasicFft<long double>;
namespace impl{
template<U32 N>
std::vector<RingZn<N>> convolution_pow2(std::vector<RingZn<N>> x, std::vector<RingZn<N>> y){
int n = x.size();
// assert(x.size()==y.size() && (n&-n)==n);
if(primality_test(N) && n <= Ntt<N>::max_size()){
if(x == y){
Ntt<N>::transform_in_place(false, x.begin(), n);
for(int i=0; i<n; ++i){
x[i] *= x[i];
}
}else{
Ntt<N>::transform_in_place(false, x.begin(), n);
Ntt<N>::transform_in_place(false, y.begin(), n);
for(int i=0; i<n; ++i){
x[i] *= y[i];
}
}
Ntt<N>::transform_in_place(true, x.begin(), n);
return x;
}
std::vector<std::complex<double>> z(n);
for(int i=0; i<n; ++i) z[i] = {(double)x[i], (double)y[i]};
Fft fft;
fft.transform_in_place(false, z.begin(), n);
for(int i=0; i<n; ++i){
z[i] *= z[i];
}
fft.transform_in_place(true, z.begin(), n);
if((double)n*N*N < 2e15){
for(int i=0; i<n; ++i){
x[i] = (U64)round(z[i].imag()/2);
}
return x;
}
std::vector<RingZn<NttU32::prime()>> a(n);
for(int i=0; i<n; ++i){
a[i] = (U32)x[i];
}
NttU32::transform_in_place(false, a.begin(), n);
if(x == y){
for(int i=0; i<n; ++i){
a[i] *= a[i];
}
}else{
std::vector<RingZn<NttU32::prime()>> b(n);
for(int i=0; i<n; ++i){
b[i] = (U32)y[i];
}
NttU32::transform_in_place(false, b.begin(), n);
for(int i=0; i<n; ++i){
a[i] *= b[i];
}
}
NttU32::transform_in_place(true, a.begin(), n);
for(int i=0; i<n; ++i){
U64 q = round((z[i].imag()/2-(U64)a[i])/NttU32::prime());
x[i] = (q%N)*NttU32::prime()+(U64)a[i];
}
return x;
}
}
template<
typename InputIt1,
typename InputIt2,
typename OutputIt,
typename = std::enable_if_t<
RingZnMod<DerefType<InputIt1>>::value == RingZnMod<DerefType<InputIt2>>::value
&& RingZnMod<DerefType<InputIt2>>::value == RingZnMod<DerefType<OutputIt>>::value
&& RingZnMod<DerefType<OutputIt>>::value != 0
>
> void convolution(InputIt1 b1, InputIt1 e1, InputIt2 b2, InputIt2 e2, int n, OutputIt res){
constexpr U32 N = RingZnMod<DerefType<InputIt1>>::value;
if(n <= 0) return;
int n1 = std::min((int)(e1-b1), n);
int n2 = std::min((int)(e2-b2), n);
if(n1<=32 || n2<=32){
std::vector<RingZn<N>> t(n);
for(int i=0; i<n1; ++i) for(int j=0; j<n2; ++j){
t[(i+j)%n] += b1[i]*b2[j];
}
copy(t.cbegin(), t.cend(), res);
return;
}
int nf = n;
while(nf != (nf&-nf)) nf += nf&-nf;
if(nf>n && n<n1+n2-1) nf *= 2;
std::vector<RingZn<N>> x(nf), y(nf);
copy(b1, b1+n1, x.begin());
copy(b2, b2+n2, y.begin());
if(nf > 2*n){
copy(b1+1, b1+n1, x.end()-n+1);
}
x = impl::convolution_pow2(move(x), move(y));
copy(x.cbegin(), x.cbegin()+n, res);
}
template<typename InputIt1, typename InputIt2, typename OutputIt>
void convolution_int(InputIt1 b1, InputIt1 e1, InputIt2 b2, InputIt2 e2, int n, OutputIt res){
if(n <= 0) return;
using OutT = DerefType<OutputIt>;
int n1 = e1-b1, n2 = e2-b2;
n1 = std::min(n1, n); n2 = std::min(n2, n);
if(n1<=32 || n2<=32){
std::vector<OutT> t(n);
for(int i=0; i<n1; ++i) for(int j=0; j<n2; ++j){
t[(i+j)%n] += static_cast<OutT>(b1[i])*static_cast<OutT>(b2[j]);
}
std::copy(t.cbegin(), t.cend(), res);
return;
}
int n_fft = n;
while((n_fft & -n_fft) != n_fft){
n_fft += n_fft & -n_fft;
}
if(n_fft>n && n<n1+n2-1){
n_fft <<= 1;
}
std::vector<std::complex<double>> x(n_fft);
for(int i=0; i<n1; ++i){
x[i] = b1[i];
}
for(int i=0; i<n2; ++i){
x[i] += std::complex<double>(0, b2[i]);
}
if(n_fft > 2*n){
for(int i=1; i<n1; ++i){
x[n_fft-n+i] = b1[i];
}
}
Fft fft;
fft.transform_in_place(false, x.begin(), n_fft);
for(int i=0; i<n_fft; ++i){
x[i] *= x[i];
}
fft.transform_in_place(true, x.begin(), n_fft);
for(int i=0; i<n; ++i){
res[i] = static_cast<OutT>(round(x[i].imag()/2));
}
}
#undef U64
#undef I64
#undef U32
#endif
#include<algorithm>
#include<functional>
#include<initializer_list>
#include<optional>
#include<vector>
#define U32 unsigned
#define I64 long long
#define U64 unsigned long long
namespace impl{
template<typename RingT>
int fps_deg(std::vector<RingT> const &f) noexcept{
auto it = find_if(f.crbegin(), f.crend(), [](RingT const &ci){return ci != 0;});
return it.base()-f.cbegin()-1;
}
template<typename RingT>
std::vector<RingT> &fps_add_assign(std::vector<RingT> &f, std::vector<RingT> const &g){
int d2 = fps_deg(g);
if((int)f.size() < d2+1) f.resize(d2+1);
for(int i=0; i<=d2; ++i){
f[i] += g[i];
}
return f;
}
template<typename RingT>
std::vector<RingT> &fps_sub_assign(std::vector<RingT> &f, std::vector<RingT> const &g){
int d2 = fps_deg(g);
if((int)f.size() < d2+1) f.resize(d2+1);
for(int i=0; i<=d2; ++i){
f[i] -= g[i];
}
return f;
}
template<typename RingT>
std::vector<RingT> fps_naive_mul(RingT const *f, int deg_f, RingT const *g, int deg_g, int lo, int hi){
std::vector<RingT> res(hi-lo+1, 0);
for(int i=std::max(lo-deg_g, 0); i<=std::min(deg_f, hi); ++i){
for(int j=std::max(lo-i, 0); j<=std::min(deg_g, hi-i); ++j){
res[i+j-lo] += f[i]*g[j];
}
}
return res;
}
template<typename RingT> struct FpsMulAssign{
std::vector<RingT> &operator()(std::vector<RingT> &f, std::vector<RingT> const &g){
int d1 = fps_deg(f), d2 = fps_deg(g);
if(d1==-1 || d2==-1){
f.clear(); return f;
}
if(d1<32 || d2<32){
return f = fps_naive_mul(f.data(), d1, g.data(), d2, 0, d1+d2);
}
int n = std::max(d1, d2)/2+1;
auto mid = f.cbegin() + std::min(n, (int)f.size());
std::vector<RingT> a(f.cbegin(), mid), b(mid, f.cend());
mid = g.cbegin() + std::min(n, (int)g.size());
std::vector<RingT> c(g.cbegin(), mid), d(mid, g.cend());
auto ac = a; (*this)(ac, c);
auto bd = b; (*this)(bd, d);
(*this)(fps_add_assign(a, b), fps_add_assign(c, d));
fps_sub_assign(fps_sub_assign(a, ac), bd);
bd.insert(bd.cbegin(), 2*n, 0);
int da = fps_deg(a);
if((int)bd.size() < n+da+1){
bd.resize(n+da+1);
}
for(int i=0; i<=da; ++i){
bd[n+i] += a[i];
}
return f = move(fps_add_assign(bd, ac));
}
};
template<U32 N> struct FpsMulAssign<RingZn<N>>{
std::vector<RingZn<N>> &operator()(std::vector<RingZn<N>> &f, std::vector<RingZn<N>> const &g){
int d1 = fps_deg(f), d2 = fps_deg(g);
if(d1==-1 || d2==-1){
f.clear(); return f;
}
f.resize(d1+d2+1);
convolution(f.cbegin(), f.cbegin()+d1+1, g.cbegin(), g.cbegin()+d2+1, d1+d2+1, f.begin());
return f;
}
};
#define RECIPROCAL_LIFTING(T) do{\
std::vector<T> two{2};\
std::vector<T> first_2i(f.cbegin(), f.cbegin()+std::min(2*i, (int)f.size()));\
FpsMulAssign<T> mul_assign;\
mul_assign(res, fps_sub_assign(two, mul_assign(first_2i, res)));\
if((int)res.size() > 2*i) res.resize(2*i);\
}while(0)
template<typename RingT> struct FpsReciprocal{
std::vector<RingT> operator()(std::vector<RingT> const &f, int n_terms){
if(n_terms <= 0) return {};
std::vector<RingT> res{1/f[0]};
for(int i=1; i<n_terms; i*=2){
RECIPROCAL_LIFTING(RingT);
}
if((int)res.size() > n_terms) res.resize(n_terms);
return res;
}
};
template<U32 N> struct FpsReciprocal<RingZn<N>>{
std::vector<RingZn<N>> operator()(std::vector<RingZn<N>> const &f, int n_terms){
if(n_terms <= 0) return {};
std::vector<RingZn<N>> res{1/f[0]};
for(int i=1; i<n_terms; i*=2){
if(primality_test(N) && 4*i<=Ntt<N>::max_size()){
std::vector<RingZn<N>> g(f.cbegin(), f.cbegin()+std::min(2*i, (int)f.size()));
res.resize(4*i); g.resize(4*i);
Ntt<N>::transform_in_place(false, res.begin(), 4*i);
Ntt<N>::transform_in_place(false, g.begin(), 4*i);
for(int j=0; j<4*i; ++j){
res[j] *= 2-g[j]*res[j];
}
Ntt<N>::transform_in_place(true, res.begin(), 4*i);
res.resize(2*i); continue;
}
RECIPROCAL_LIFTING(RingZn<N>);
}
return res;
}
};
#undef RECIPROCAL_LIFTING
} // namespace impl
template<typename RingT> struct BasicFps: private std::vector<RingT>{
using Base = std::vector<RingT>;
using Base::capacity;
using Base::clear;
using Base::get_allocator;
using Base::reserve;
using Base::resize;
using Base::shrink_to_fit;
using Base::size;
BasicFps() = default;
template<typename INT, typename = std::enable_if_t<std::is_integral_v<INT>>>
constexpr BasicFps(INT c): BasicFps(static_cast<RingT>(c)){}
BasicFps(RingT const &c): BasicFps(c==0? Base{}: Base{c}){}
BasicFps(std::initializer_list<RingT> coef): BasicFps(Base(coef)){}
template<typename It> BasicFps(It b, It e): BasicFps(Base(b, e)){}
explicit BasicFps(Base coef): Base(move(coef)){}
std::vector<RingT> into_vec(){
std::vector<RingT> res;
res.swap(*this);
return res;
}
int deg() const noexcept{
return impl::fps_deg(*this);
}
explicit operator bool() const noexcept{
return deg() >= 0;
}
RingT coef(int i) const noexcept{
return i>=(int)size()? 0: (*this)[i];
}
template<typename T> BasicFps &set_coef(int i, T &&ci){
if(i >= (int)size()) resize(i+1);
(*this)[i] = std::forward<T>(ci);
return *this;
}
RingT operator()(RingT const &x) const noexcept{
RingT res = 0;
for(size_t i=size(); i-->0; ){
res = x*res + (*this)[i];
}
return res;
}
BasicFps &operator<<=(int n_terms){
this->insert(this->cbegin(), n_terms, 0);
return *this;
}
BasicFps operator<<(int n_terms) const{
auto res = *this;
return res <<= n_terms;
}
BasicFps &operator>>=(int n_terms){
erase(this->cbegin(), this->cbegin()+std::min(n_terms, (int)size()));
return *this;
}
BasicFps operator>>(int n_terms) const{
if(n_terms < (int)size()){
return BasicFps(this->cbegin()+n_terms, this->cend());
}
return {};
}
BasicFps &truncate(int n_terms) noexcept{
if(n_terms < (int)size()){
resize(n_terms);
}
return *this;
}
BasicFps first_n_terms(int n) const{
return BasicFps(this->cbegin(), this->cbegin()+std::min(n, (int)size()));
}
BasicFps rev() const{
int d = deg();
std::vector<RingT> res;
res.reserve(d+1);
reverse_copy(this->cbegin(), this->cbegin()+(d+1), back_inserter(res));
return BasicFps(move(res));
}
BasicFps operator+() const{
return *this;
}
BasicFps operator-() const{
auto res = *this;
transform(res.begin(), res.end(), res.begin(), std::negate<RingT>());
return res;
}
BasicFps &operator+=(BasicFps const &rhs){
impl::fps_add_assign(*this, rhs);
return *this;
}
BasicFps &operator-=(BasicFps const &rhs){
impl::fps_sub_assign(*this, rhs);
return *this;
}
BasicFps &operator*=(BasicFps const &rhs){
impl::FpsMulAssign<RingT>()(*this, rhs);
return *this;
}
BasicFps reciprocal(int n_terms) const{
return BasicFps(impl::FpsReciprocal<RingT>()(*this, n_terms));
}
BasicFps &operator/=(BasicFps const &rhs){
int n = deg(), d = rhs.deg();
if(d<32 || n-d<32){
resize(n+1);
return *this = BasicFps(naive_division(std::move(*this), rhs.data(), d).first);
}
auto rf = rev(), rg = rhs.rev();
*this = rf.truncate(n-d+1)*rg.reciprocal(n-d+1);
resize(n-d+1);
reverse(this->begin(), this->end());
return *this;
}
BasicFps &operator%=(BasicFps const &rhs){
int n = deg(), d = rhs.deg();
if(d<32 || n-d<32){
resize(n+1);
return *this = BasicFps(naive_division(std::move(*this), rhs.data(), d).second);
}
auto q = *this;
q /= rhs; q *= rhs;
truncate(d); q.truncate(d);
return *this -= q;
}
BasicFps derivative() const{
BasicFps res;
if(int d=deg(); d>0){
res.resize(d);
for(int i=1; i<=d; ++i){
res[i-1] = i*(*this)[i];
}
}
return res;
}
BasicFps integral() const{
BasicFps res;
if(int d=deg(); d>=0){
res.resize(d+2);
for(int i=0; i<=d; ++i){
res[i+1] = (*this)[i]/(i+1);
}
}
return res;
}
#define DEF_BIOP(op)\
friend BasicFps operator op(BasicFps lhs, BasicFps const &rhs){\
return lhs op##= rhs;\
}
DEF_BIOP(+)
DEF_BIOP(-)
DEF_BIOP(*)
DEF_BIOP(/)
DEF_BIOP(%)
#undef DEF_BIOP
friend bool operator==(BasicFps const &lhs, BasicFps const &rhs){
int d = lhs.deg();
return rhs.deg()==d && equal(lhs.cbegin(), lhs.cbegin()+(d+1), rhs.cbegin());
}
friend bool operator!=(BasicFps const &lhs, BasicFps const &rhs){
return !(lhs == rhs);
}
private:
static std::pair<std::vector<RingT>, std::vector<RingT>> naive_division(std::vector<RingT> f, RingT const *g, int deg_g){
int deg_f = (int)f.size()-1;
if(deg_f < deg_g){
return {{}, move(f)};
}
std::vector<RingT> q(deg_f-deg_g+1);
for(int i=deg_f-deg_g; i>=0; --i){
RingT qi = f[i+deg_g]/g[deg_g];
f[i+deg_g] = 0;
for(int j=0; j<deg_g; ++j){
f[i+j] -= qi*g[j];
}
q[i] = qi;
}
f.resize(deg_g);
return {move(q), move(f)};
}
};
template<U32 N> using FpsMod = BasicFps<RingZn<N>>;
template<typename RingT>
BasicFps<RingT> log_fps(BasicFps<RingT> const &f, int n_terms){
// assert(f.coef(0) == 1);
if(n_terms <= 0) return {};
return (f.derivative()*f.reciprocal(n_terms-1)).truncate(n_terms-1).integral();
}
template<typename RingT>
BasicFps<RingT> exp_fps(BasicFps<RingT> const &f, int n_terms){
// assert(f.coef(0) == 0);
if(n_terms <= 0) return {};
if(n_terms == 1) return 1;
auto g = exp_fps(f.first_n_terms((n_terms+1)/2), (n_terms+1)/2);
return (g*(1-log_fps(g, n_terms)+f)).truncate(n_terms);
}
template<typename RingT>
RingT pow_fps(RingT const &f, I64 n, int n_terms){
// assert(n >= 0);
if(n_terms <= 0) return {};
if(n == 0) return 1;
int d = f.deg();
if(d == -1) return {};
int low = 0;
while(!f.coef(low)) ++low;
if((n_terms-1)/n < low) return {};
int m = n_terms-n*low;
auto res = f >> low;
res /= f.coef(low);
res = exp_fps(n*log_fps(res, m), m);
res *= pow(f.coef(low), n);
return res <<= n*low;
}
template<U32 P>
std::optional<FpsMod<P>> sqrt_fps(FpsMod<P> const &f, int n_terms){
int d = f.deg();
if(d == -1) return {};
int low = 0;
while(!f.coef(low)) ++low;
I64 c0;
if(low % 2 || (c0 = sqrt_modp((U32)f.coef(low), P)) == -1){
return {};
}
FpsMod<P> g = f>>low, res = c0;
for(int i=1; i<n_terms-low/2; i*=2){
res = (res+g.first_n_terms(2*i)*res.reciprocal(2*i))/2;
}
return res << low/2;
}
#undef U64
#undef I64
#undef U32
#endif
#include<vector>
#include<cstdio>
using namespace std;
constexpr int P = 998'244'353;
void st_build(RingZn<P> const *x, FpsMod<P> *st, int i, int l, int r){
if(l == r){
st[i] = {-x[l], 1};
return;
}
int m = l+(r-l)/2;
st_build(x, st, 2*i, l, m);
st_build(x, st, 2*i+1, m+1, r);
st[i] = st[2*i] * st[2*i+1];
}
void st_eval(RingZn<P> const *x, FpsMod<P> const *st, FpsMod<P> f, int i, int l, int r, RingZn<P> *res){
if(r-l < 32){
for(int j=l; j<=r; ++j){
res[j] = f(x[j]);
}
return;
}
int m = l+(r-l)/2;
st_eval(x, st, f%st[2*i], 2*i, l, m, res);
st_eval(x, st, move(f%=st[2*i+1]), 2*i+1, m+1, r, res);
}
int main(){
int n, m;
scanf("%d%d", &n, &m);
vector<RingZn<P>> coef(n);
for(int i=0; i<n; ++i){
int ci; scanf("%d", &ci);
coef[i] = ci;
}
FpsMod<P> f(move(coef));
vector<RingZn<P>> x(m);
for(int i=0; i<m; ++i){
int xi; scanf("%d", &xi);
x[i] = xi;
}
vector<FpsMod<P>> st(4*m);
st_build(x.data(), st.data(), 1, 0, m-1);
vector<RingZn<P>> ans(m);
st_eval(x.data(), st.data(), move(f%=st[1]), 1, 0, m-1, ans.data());
for(int i=0; i<m; ++i){
printf("%d%c", (int)ans[i], " \n"[i==m-1]);
}
}